[clang] [llvm] [OpenMP] Replace nvvm.annotation usage with kernel calling conventions (PR #122320)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 09:31:50 PST 2025
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/122320
Specifying a kernel with the `ptx_kernel` or `amdgpu_kernel` calling convention is a more idiomatic and compile-time performant than using the `nvvm.annoation !"kernel"` metadata.
Transition OMPIRBuilder to use calling conventions for PTX kernels and no longer emit `nvvm.annoation`. Update OpenMPOpt to work with kernels specified via calling convention as well as metadata. Update OpenMP tests to use the calling conventions.
>From 2c32b06fdd4c598f039f3bf53d9fb89d3cd4f478 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 9 Jan 2025 17:25:55 +0000
Subject: [PATCH] [OpenMP] Replace nvvm.annotation usage with kernel calling
conventions
---
clang/test/OpenMP/assumes_include_nvptx.cpp | 4 +-
.../nvptx_target_firstprivate_codegen.cpp | 2 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 16 +-
llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 56 +-
.../Transforms/OpenMP/always_inline_device.ll | 17 +-
.../attributor_module_slice_reproducer.ll | 6 +-
.../test/Transforms/OpenMP/barrier_removal.ll | 146 +-
llvm/test/Transforms/OpenMP/bug66687.ll | 11 +-
.../OpenMP/custom_state_machines.ll | 81 +-
.../OpenMP/custom_state_machines_pre_lto.ll | 109 +-
.../OpenMP/custom_state_machines_remarks.ll | 7 +-
.../Transforms/OpenMP/deduplication_target.ll | 4 +-
.../get_hardware_num_threads_in_block_fold.ll | 13 +-
...dware_num_threads_in_block_fold_optnone.ll | 13 +-
.../Transforms/OpenMP/global_constructor.ll | 13 +-
.../OpenMP/globalization_remarks.ll | 4 +-
.../OpenMP/gpu_kernel_detection_remarks.ll | 9 +-
..._state_machine_function_ptr_replacement.ll | 4 +-
.../OpenMP/is_spmd_exec_mode_fold.ll | 17 +-
.../Transforms/OpenMP/nested_parallelism.ll | 13 +-
.../Transforms/OpenMP/parallel_level_fold.ll | 13 +-
.../Transforms/OpenMP/remove_globalization.ll | 26 +-
.../OpenMP/replace_globalization.ll | 21 +-
.../OpenMP/single_threaded_execution.ll | 4 +-
llvm/test/Transforms/OpenMP/spmdization.ll | 1607 +++--------------
.../Transforms/OpenMP/spmdization_assumes.ll | 27 +-
.../OpenMP/spmdization_constant_prop.ll | 2 -
.../Transforms/OpenMP/spmdization_guarding.ll | 106 +-
...mdization_guarding_two_reaching_kernels.ll | 36 +-
.../Transforms/OpenMP/spmdization_indirect.ll | 161 +-
...zation_no_guarding_two_reaching_kernels.ll | 35 +-
.../Transforms/OpenMP/spmdization_remarks.ll | 7 +-
.../OpenMP/value-simplify-openmp-opt.ll | 96 +-
33 files changed, 604 insertions(+), 2082 deletions(-)
diff --git a/clang/test/OpenMP/assumes_include_nvptx.cpp b/clang/test/OpenMP/assumes_include_nvptx.cpp
index 4577ea4c9c2b5e..c5040989a0e407 100644
--- a/clang/test/OpenMP/assumes_include_nvptx.cpp
+++ b/clang/test/OpenMP/assumes_include_nvptx.cpp
@@ -11,11 +11,11 @@
// TODO: Think about teaching the OMPIRBuilder about default attributes as well so the __kmpc* declarations are annotated.
-// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]]
+// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIfEvv_{{.*}}({{.*}}) [[attr0:#[0-9]]]
// CHECK: call i32 @__kmpc_target_init(
// CHECK: declare noundef float @_Z3sinf(float noundef) [[attr1:#[0-9]*]]
// CHECK: declare void @__kmpc_target_deinit(
-// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]]
+// CHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.*}}__Z17complex_reductionIdEvv_{{.*}}({{.*}}) [[attr0]]
// CHECK: %call = call noundef double @_Z3sind(double noundef 0.000000e+00) [[attr2:#[0-9]]]
// CHECK: declare noundef double @_Z3sind(double noundef) [[attr1]]
diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
index d573f1cd193d64..94ace20826db4d 100644
--- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
@@ -90,7 +90,7 @@ int foo(int n, double *ptr) {
ptr[0]++;
}
- // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]])
+ // TCHECK: define weak_odr protected ptx_kernel void @__omp_offloading_{{.+}}(ptr {{[^,]+}}, ptr noundef [[PTR_IN:%.+]])
// TCHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr,
// TCHECK: [[PTR_ADDR:%.+]] = alloca ptr,
// TCHECK-NOT: alloca ptr,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8dbf2aa7e0a243..487f886f9bdbfd 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6404,6 +6404,8 @@ void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility);
if (T.isAMDGCN())
OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ else if (T.isNVPTX())
+ OutlinedFn->setCallingConv(CallingConv::PTX_Kernel);
}
}
@@ -9077,20 +9079,8 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
if (!Fn)
return;
- Module &M = *(Fn->getParent());
- LLVMContext &Ctx = M.getContext();
-
- // Get "nvvm.annotations" metadata node.
- NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-
- Metadata *MDVals[] = {
- ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"),
- ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))};
- // Append metadata to nvvm.annotations.
- MD->addOperand(MDNode::get(Ctx, MDVals));
-
// Add a function attribute for the kernel.
- Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
+ Fn->addFnAttr("kernel");
if (T.isAMDGCN())
Fn->addFnAttr("uniform-work-group-size", "true");
Fn->addFnAttr(Attribute::MustProgress);
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 67585e9c80ef4e..f495840c254d59 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -19,6 +19,7 @@
#include "llvm/Transforms/IPO/OpenMPOpt.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -36,6 +37,7 @@
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -5909,34 +5911,52 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) {
return Fn.hasFnAttribute("kernel");
}
+static bool isKernelCC(Function &F) {
+ switch (F.getCallingConv()) {
+ default:
+ return false;
+ case CallingConv::PTX_Kernel:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ }
+}
+
KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
- NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
KernelSet Kernels;
- if (!MD)
- return Kernels;
-
- for (auto *Op : MD->operands()) {
- if (Op->getNumOperands() < 2)
- continue;
- MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
- if (!KindID || KindID->getString() != "kernel")
- continue;
-
- Function *KernelFn =
- mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
- if (!KernelFn)
- continue;
+ DenseSet<const Function *> SeenKernels;
+ auto ProcessKernel = [&](Function &KF) {
+ if (SeenKernels.contains(&KF))
+ return;
+ SeenKernels.insert(&KF);
// We are only interested in OpenMP target regions. Others, such as kernels
// generated by CUDA but linked together, are not interesting to this pass.
- if (isOpenMPKernel(*KernelFn)) {
+ if (isOpenMPKernel(KF)) {
++NumOpenMPTargetRegionKernels;
- Kernels.insert(KernelFn);
+ Kernels.insert(&KF);
} else
++NumNonOpenMPTargetRegionKernels;
- }
+ };
+
+ if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"))
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() < 2)
+ continue;
+ MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
+ if (!KindID || KindID->getString() != "kernel")
+ continue;
+
+ if (auto *KernelFn =
+ mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)))
+ ProcessKernel(*KernelFn);
+ }
+
+ for (Function &F : M)
+ if (isKernelCC(F))
+ ProcessKernel(F);
return Kernels;
}
diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll
index 6028ff5278037b..9c5b19f7a6c88c 100644
--- a/llvm/test/Transforms/OpenMP/always_inline_device.ll
+++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll
@@ -17,7 +17,7 @@
; CHECK: @G = external global i8
; CHECK: @kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
-define weak void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_c0934fc2_foo_l4(ptr %dyn) #0 {
; CHECK: Function Attrs: norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
@@ -79,12 +79,10 @@ attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-wi
attributes #2 = { convergent }
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
-!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
@@ -97,11 +95,10 @@ attributes #2 = { convergent }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
index 9c0416af359d4d..3f4790ee15ac8d 100644
--- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
+++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
@@ -13,10 +13,6 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a
ret i8 undef
}
-declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr
+declare ptx_kernel void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr
declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1}
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 47a5d5104aa8bd..5b7544b1a79616 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -28,7 +28,7 @@ declare void @llvm.assume(i1)
; CHECK: @G1 = global i32 42
; CHECK: @G2 = addrspace(1) global i32 0
;.
-define void @pos_empty_1(i1 %c) "kernel" {
+define amdgpu_kernel void @pos_empty_1(i1 %c) "kernel" {
; MODULE-LABEL: define {{[^@]+}}@pos_empty_1
; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
; MODULE-NEXT: ret void
@@ -45,7 +45,7 @@ define void @pos_empty_1(i1 %c) "kernel" {
call void @llvm.assume(i1 %c)
ret void
}
-define void @pos_empty_2() "kernel" {
+define amdgpu_kernel void @pos_empty_2() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_2
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: ret void
@@ -53,7 +53,7 @@ define void @pos_empty_2() "kernel" {
call void @aligned_barrier()
ret void
}
-define void @pos_empty_3() "kernel" {
+define amdgpu_kernel void @pos_empty_3() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_3
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: ret void
@@ -61,7 +61,7 @@ define void @pos_empty_3() "kernel" {
call void @llvm.nvvm.barrier0()
ret void
}
-define void @pos_empty_4() "kernel" {
+define amdgpu_kernel void @pos_empty_4() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_4
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: ret void
@@ -69,7 +69,7 @@ define void @pos_empty_4() "kernel" {
call i32 @llvm.nvvm.barrier0.and(i32 0)
ret void
}
-define void @pos_empty_5() "kernel" {
+define amdgpu_kernel void @pos_empty_5() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_5
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: ret void
@@ -77,7 +77,7 @@ define void @pos_empty_5() "kernel" {
call i32 @llvm.nvvm.barrier0.or(i32 0)
ret void
}
-define void @pos_empty_6() "kernel" {
+define amdgpu_kernel void @pos_empty_6() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_6
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: ret void
@@ -85,7 +85,7 @@ define void @pos_empty_6() "kernel" {
call i32 @llvm.nvvm.barrier0.popc(i32 0)
ret void
}
-define void @pos_empty_7a() "kernel" {
+define amdgpu_kernel void @pos_empty_7a() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: call void @unknown()
@@ -96,7 +96,7 @@ define void @pos_empty_7a() "kernel" {
ret void
}
; FIXME: We should remove the barrier.
-define void @pos_empty_7b() "kernel" {
+define amdgpu_kernel void @pos_empty_7b() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
@@ -109,7 +109,7 @@ define void @pos_empty_7b() "kernel" {
call void @unknown()
ret void
}
-define void @pos_empty_8(i1 %c) "kernel" {
+define amdgpu_kernel void @pos_empty_8(i1 %c) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_8
; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
@@ -126,7 +126,7 @@ t:
f:
ret void
}
-define void @neg_empty_8() "kernel" {
+define amdgpu_kernel void @neg_empty_8() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_empty_8
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: call void @unknown()
@@ -137,7 +137,7 @@ define void @neg_empty_8() "kernel" {
call void @llvm.amdgcn.s.barrier()
ret void
}
-define void @neg_empty_9(i1 %c) "kernel" {
+define amdgpu_kernel void @neg_empty_9(i1 %c) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_empty_9
; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
@@ -173,7 +173,7 @@ m:
ret void
}
; FIXME: We should remove the barrier
-define void @pos_empty_10() "kernel" {
+define amdgpu_kernel void @pos_empty_10() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_10
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: br label [[M:%.*]]
@@ -186,7 +186,7 @@ m:
call void @llvm.amdgcn.s.barrier()
ret void
}
-define void @pos_empty_11() "kernel" {
+define amdgpu_kernel void @pos_empty_11() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_11
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: br label [[M:%.*]]
@@ -206,7 +206,7 @@ define void @empty() {
ret void
}
; FIXME: We should remove the barrier in the end but not the first one.
-define void @neg_empty_12(i1 %c) "kernel" {
+define amdgpu_kernel void @neg_empty_12(i1 %c) "kernel" {
; MODULE-LABEL: define {{[^@]+}}@neg_empty_12
; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
; MODULE-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
@@ -266,7 +266,7 @@ define void @neg_empty_2() "kernel" {
@GC1 = constant i32 42
@GC2 = addrspace(4) global i32 0
@GPtr4 = addrspace(4) global ptr addrspace(4) null
-define void @pos_constant_loads() "kernel" {
+define amdgpu_kernel void @pos_constant_loads() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8
@@ -296,7 +296,7 @@ define void @pos_constant_loads() "kernel" {
@GS = addrspace(3) global i32 0
@GPtr = global ptr null
; TODO: We could remove some of the barriers due to the lack of write effects.
-define void @neg_loads() "kernel" {
+define amdgpu_kernel void @neg_loads() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_loads
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
@@ -327,7 +327,7 @@ define void @neg_loads() "kernel" {
@PG1 = thread_local global i32 42
@PG2 = addrspace(5) global i32 0
@GPtr5 = global ptr addrspace(5) null
-define void @pos_priv_mem() "kernel" {
+define amdgpu_kernel void @pos_priv_mem() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4
@@ -358,7 +358,7 @@ define void @pos_priv_mem() "kernel" {
}
@G1 = global i32 42
@G2 = addrspace(1) global i32 0
-define void @neg_mem() "kernel" {
+define amdgpu_kernel void @neg_mem() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_mem
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
@@ -388,7 +388,7 @@ define void @neg_mem() "kernel" {
ret void
}
-define void @pos_multiple() "kernel" {
+define amdgpu_kernel void @pos_multiple() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_multiple
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: ret void
@@ -404,7 +404,7 @@ define void @pos_multiple() "kernel" {
ret void
}
-define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
+define amdgpu_kernel void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1
; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
@@ -461,7 +461,7 @@ m:
ret void
}
-define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
+define amdgpu_kernel void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2
; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: store i32 4, ptr [[P]], align 4
@@ -727,7 +727,7 @@ define internal void @barrier_then_write_then_barrier0(ptr %p) {
call void @aligned_barrier()
ret void
}
-define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" {
+define amdgpu_kernel void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" {
; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]])
@@ -1040,7 +1040,7 @@ define internal void @callee_barrier() {
call void @aligned_barrier()
ret void
}
-define void @caller_barrier1() "kernel" {
+define amdgpu_kernel void @caller_barrier1() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@caller_barrier1
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: call void @callee_barrier()
@@ -1051,7 +1051,7 @@ define void @caller_barrier1() "kernel" {
call void @aligned_barrier()
ret void
}
-define void @caller_barrier2() "kernel" {
+define amdgpu_kernel void @caller_barrier2() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@caller_barrier2
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: call void @unknown()
@@ -1065,7 +1065,7 @@ define void @caller_barrier2() "kernel" {
ret void
}
-define void @loop_barrier() "kernel" {
+define amdgpu_kernel void @loop_barrier() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@loop_barrier
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
@@ -1095,7 +1095,7 @@ exit:
ret void
}
-define void @loop_barrier_end_barriers() "kernel" {
+define amdgpu_kernel void @loop_barrier_end_barriers() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
@@ -1129,7 +1129,7 @@ exit:
ret void
}
-define void @loop_barrier_end_barriers_unknown() "kernel" {
+define amdgpu_kernel void @loop_barrier_end_barriers_unknown() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
@@ -1165,7 +1165,7 @@ exit:
ret void
}
-define void @loop_barrier_store() "kernel" {
+define amdgpu_kernel void @loop_barrier_store() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
@@ -1195,7 +1195,7 @@ exit:
ret void
}
-define void @loop_barrier_end_barriers_store() "kernel" {
+define amdgpu_kernel void @loop_barrier_end_barriers_store() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
@@ -1232,37 +1232,7 @@ exit:
}
!llvm.module.flags = !{!16,!15}
-!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30}
-!0 = !{ptr @pos_empty_1, !"kernel", i32 1}
-!1 = !{ptr @pos_empty_2, !"kernel", i32 1}
-!2 = !{ptr @pos_empty_3, !"kernel", i32 1}
-!3 = !{ptr @pos_empty_4, !"kernel", i32 1}
-!4 = !{ptr @pos_empty_5, !"kernel", i32 1}
-!5 = !{ptr @pos_empty_6, !"kernel", i32 1}
-!17 = !{ptr @pos_empty_7a, !"kernel", i32 1}
-!18 = !{ptr @pos_empty_7b, !"kernel", i32 1}
-!23 = !{ptr @pos_empty_8, !"kernel", i32 1}
-!24 = !{ptr @caller_barrier1, !"kernel", i32 1}
-!25 = !{ptr @caller_barrier2, !"kernel", i32 1}
-!26 = !{ptr @loop_barrier, !"kernel", i32 1}
-!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
-!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
-!29 = !{ptr @loop_barrier_store, !"kernel", i32 1}
-!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
-!6 = !{ptr @neg_empty_8, !"kernel", i32 1}
-!19 = !{ptr @neg_empty_9, !"kernel", i32 1}
-!20 = !{ptr @pos_empty_10, !"kernel", i32 1}
-!21 = !{ptr @pos_empty_11, !"kernel", i32 1}
-!22 = !{ptr @neg_empty_12, !"kernel", i32 1}
-!7 = !{ptr @pos_constant_loads, !"kernel", i32 1}
-!8 = !{ptr @neg_loads, !"kernel", i32 1}
-!9 = !{ptr @pos_priv_mem, !"kernel", i32 1}
-!10 = !{ptr @neg_mem, !"kernel", i32 1}
-!11 = !{ptr @pos_multiple, !"kernel", i32 1}
-!12 = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
-!13 = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
-!14 = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
!15 = !{i32 7, !"openmp", i32 50}
!16 = !{i32 7, !"openmp-device", i32 50}
;.
@@ -1282,65 +1252,7 @@ exit:
;.
; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; MODULE: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
-; MODULE: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
-; MODULE: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
-; MODULE: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
-; MODULE: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
-; MODULE: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
-; MODULE: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
-; MODULE: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
-; MODULE: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
-; MODULE: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
-; MODULE: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
-; MODULE: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
-; MODULE: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
-; MODULE: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
-; MODULE: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
-; MODULE: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
-; MODULE: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
-; MODULE: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
-; MODULE: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
-; MODULE: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
-; MODULE: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
-; MODULE: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
-; MODULE: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
-; MODULE: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
-; MODULE: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
-; MODULE: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
-; MODULE: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
-; MODULE: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
-; MODULE: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
;.
; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CGSCC: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
-; CGSCC: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
-; CGSCC: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
-; CGSCC: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
-; CGSCC: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
-; CGSCC: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
-; CGSCC: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
-; CGSCC: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
-; CGSCC: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
-; CGSCC: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
-; CGSCC: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
-; CGSCC: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
-; CGSCC: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
-; CGSCC: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
-; CGSCC: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
-; CGSCC: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
-; CGSCC: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
-; CGSCC: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
-; CGSCC: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
-; CGSCC: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
-; CGSCC: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
-; CGSCC: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
-; CGSCC: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
-; CGSCC: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
-; CGSCC: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
-; CGSCC: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
-; CGSCC: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
-; CGSCC: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
-; CGSCC: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/bug66687.ll b/llvm/test/Transforms/OpenMP/bug66687.ll
index e0a9b825a88041..9bb069b1735bed 100644
--- a/llvm/test/Transforms/OpenMP/bug66687.ll
+++ b/llvm/test/Transforms/OpenMP/bug66687.ll
@@ -5,25 +5,22 @@ source_filename = "bug66687.ll"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
-define weak void @openmp_kernel() "kernel" {
-; CHECK-LABEL: define weak void @openmp_kernel(
+define weak ptx_kernel void @openmp_kernel() "kernel" {
+; CHECK-LABEL: define weak ptx_kernel void @openmp_kernel(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: ret void
;
ret void
}
-define weak_odr void @non_openmp_kernel() {
-; CHECK-LABEL: define weak_odr void @non_openmp_kernel() {
+define weak_odr ptx_kernel void @non_openmp_kernel() {
+; CHECK-LABEL: define weak_odr ptx_kernel void @non_openmp_kernel() {
; CHECK-NEXT: ret void
;
ret void
}
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3}
!0 = !{i32 7, !"openmp", i32 51}
!1 = !{i32 7, !"openmp-device", i32 51}
-!2 = !{ptr @openmp_kernel, !"kernel", i32 1}
-!3 = !{ptr @non_openmp_kernel, !"kernel", i32 1}
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index e6ddf16f067634..10e521bbfcc10f 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -138,7 +138,7 @@
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -197,7 +197,7 @@ declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -290,7 +290,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -367,7 +367,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -453,7 +453,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -537,7 +537,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -624,7 +624,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -679,7 +679,7 @@ return: ; preds = %if.end, %if.then
declare i32 @omp_get_thread_num(...) #4
-define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -812,7 +812,6 @@ attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
attributes #9 = { convergent nounwind readonly willreturn }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
-!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15}
!llvm.module.flags = !{!16, !17, !18}
!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
@@ -823,14 +822,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
!16 = !{i32 1, !"wchar_size", i32 4}
!17 = !{i32 7, !"openmp", i32 50}
!18 = !{i32 7, !"openmp-device", i32 50}
@@ -4107,17 +4098,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4127,17 +4110,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4147,17 +4122,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4167,15 +4134,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index d20821d450365b..9576ff6ca6aeee 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -139,7 +139,7 @@
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -196,7 +196,7 @@ declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -289,7 +289,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -366,7 +366,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -452,7 +452,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -536,7 +536,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -623,7 +623,7 @@ entry:
ret void
}
-define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -678,7 +678,7 @@ return: ; preds = %if.end, %if.then
declare i32 @omp_get_thread_num(...) #4
-define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -811,7 +811,6 @@ attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
attributes #9 = { convergent nounwind readonly willreturn }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
-!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15}
!llvm.module.flags = !{!16, !17, !18}
!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
@@ -822,14 +821,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
!16 = !{i32 1, !"wchar_size", i32 4}
!17 = !{i32 7, !"openmp", i32 50}
!18 = !{i32 7, !"openmp-device", i32 50}
@@ -4976,17 +4967,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4996,17 +4979,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5016,17 +4991,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5036,17 +5003,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5056,17 +5015,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5076,15 +5027,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
index f7bfd306506944..ad41639511e994 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
@@ -59,7 +59,7 @@ target triple = "nvptx64"
; Function Attrs: convergent norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 {
+define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18
@@ -104,7 +104,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
declare void @__kmpc_target_deinit() local_unnamed_addr
; Function Attrs: norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 {
+define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33
@@ -175,7 +175,6 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" }
!llvm.dbg.cu = !{!0}
!omp_offload.info = !{!3, !4}
-!nvvm.annotations = !{!5, !6}
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
!llvm.ident = !{!14}
@@ -184,8 +183,6 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" }
!2 = !{}
!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
!7 = !{i32 7, !"Dwarf Version", i32 2}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll
index 6b0563365c6480..7027c3275b9329 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_target.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll
@@ -15,7 +15,7 @@ target triple = "nvptx64"
declare void @use(i32)
-define weak void @__omp_offloading_50_a3e09bf8_foo_l2(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_50_a3e09bf8_foo_l2(ptr %dyn) #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -56,11 +56,9 @@ attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer
attributes #1 = { nounwind }
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4}
!0 = !{i32 0, i32 80, i32 -1545561096, !"foo", i32 2, i32 0}
-!1 = !{ptr @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index 6102201ad4bac7..6a4519a161fd6f 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -19,7 +19,7 @@ target triple = "nvptx64"
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
-define weak void @kernel0(ptr %dyn) "kernel" #0 {
+define weak ptx_kernel void @kernel0(ptr %dyn) "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr [[DYN]])
@@ -43,7 +43,7 @@ define weak void @kernel0(ptr %dyn) "kernel" #0 {
ret void
}
-define weak void @kernel1(ptr %dyn) "kernel" #0 {
+define weak ptx_kernel void @kernel1(ptr %dyn) "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr [[DYN]])
@@ -63,7 +63,7 @@ define weak void @kernel1(ptr %dyn) "kernel" #0 {
ret void
}
-define weak void @kernel2(ptr %dyn) "kernel" #0 {
+define weak ptx_kernel void @kernel2(ptr %dyn) "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
@@ -200,15 +200,11 @@ declare i32 @__kmpc_global_thread_num(ptr)
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4}
attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{ptr @kernel0, !"kernel", i32 1}
-!3 = !{ptr @kernel1, !"kernel", i32 1}
-!4 = !{ptr @kernel2, !"kernel", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }
@@ -217,7 +213,4 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
index 0cf6e7488b4dd4..3037d24b8c4484 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
@@ -7,7 +7,7 @@ target triple = "nvptx64"
;.
; CHECK: @G = external global i32
;.
-define weak void @kernel0() #0 {
+define weak ptx_kernel void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null)
@@ -25,7 +25,7 @@ define weak void @kernel0() #0 {
ret void
}
-define weak void @kernel1() #0 {
+define weak ptx_kernel void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null)
@@ -39,7 +39,7 @@ define weak void @kernel1() #0 {
ret void
}
-define weak void @kernel2() #0 {
+define weak ptx_kernel void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, ptr null)
@@ -107,15 +107,11 @@ declare void @__kmpc_target_deinit() #1
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4}
attributes #0 = { optnone noinline "kernel" "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{ptr @kernel0, !"kernel", i32 1}
-!3 = !{ptr @kernel1, !"kernel", i32 1}
-!4 = !{ptr @kernel2, !"kernel", i32 1}
;
;.
; CHECK: attributes #[[ATTR0]] = { noinline optnone "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
@@ -123,7 +119,4 @@ attributes #0 = { optnone noinline "kernel" "omp_target_thread_limit"="666" "omp
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll
index 804b910dcd3088..1d18e527e14667 100644
--- a/llvm/test/Transforms/OpenMP/global_constructor.ll
+++ b/llvm/test/Transforms/OpenMP/global_constructor.ll
@@ -10,7 +10,7 @@
@_ZL6Device = internal global double 0.000000e+00, align 8
@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" {
+define weak ptx_kernel void @__omp_offloading_fd02_85283c04_main_l11(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" {
entry:
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment, ptr %dyn) #0
%exec_user_code = icmp eq i32 %0, -1
@@ -39,7 +39,7 @@ declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
-define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" {
+define weak ptx_kernel void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" {
entry:
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
@@ -58,15 +58,12 @@ attributes #0 = { nounwind }
attributes #1 = { convergent nounwind }
!omp_offload.info = !{!0, !1, !2}
-!nvvm.annotations = !{!3, !4}
!llvm.module.flags = !{!5, !6, !7, !8, !9}
!llvm.ident = !{!10}
!0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1}
!1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2}
!2 = !{i32 1, !"_ZL6Device", i32 0, i32 0}
-!3 = !{ptr @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1}
-!4 = !{ptr @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"openmp", i32 50}
!7 = !{i32 7, !"openmp-device", i32 50}
@@ -86,12 +83,12 @@ attributes #1 = { convergent nounwind }
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
-; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA9:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
-; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
+; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA9]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]]
@@ -105,6 +102,6 @@ attributes #1 = { convergent nounwind }
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR2]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
-; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
+; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA9]]
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
index 878ac9010a7dc3..0f37b3e070acd3 100644
--- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
@@ -13,7 +13,7 @@ target triple = "nvptx64"
@S = external local_unnamed_addr global ptr
@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
-define void @foo() "kernel" {
+define ptx_kernel void @foo() "kernel" {
entry:
%c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr null)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
@@ -39,7 +39,6 @@ declare void @__kmpc_target_deinit()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
-!nvvm.annotations = !{!7, !8}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c")
@@ -48,7 +47,6 @@ declare void @__kmpc_target_deinit()
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{ptr @foo, !"kernel", i32 1}
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!9 = !DISubroutineType(types: !2)
!10 = !DILocation(line: 5, column: 7, scope: !8)
diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
index b029efbbe3c687..ce17ffcbb20846 100644
--- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
@@ -3,11 +3,11 @@
; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel1
; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel2
-define void @kernel1() "kernel" {
+define ptx_kernel void @kernel1() "kernel" {
ret void
}
-define void @kernel2() "kernel" {
+define ptx_kernel void @kernel2() "kernel" {
ret void
}
@@ -19,10 +19,5 @@ define void @non_kernel() {
declare dso_local void @__kmpc_kernel_prepare_parallel(ptr)
!llvm.module.flags = !{!4}
-!nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
-!0 = !{ptr @kernel1, !"kernel", i32 1}
-!1 = !{ptr @non_kernel, !"non_kernel", i32 1}
-!2 = !{null, !"align", i32 1}
-!3 = !{ptr @kernel2, !"kernel", i32 1}
!4 = !{i32 7, !"openmp", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
index 936f7d1c46781a..760c5a354a37ce 100644
--- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -44,7 +44,7 @@
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-define weak void @__omp_offloading_10301_87b2c_foo_l7() "kernel" {
+define weak ptx_kernel void @__omp_offloading_10301_87b2c_foo_l7() "kernel" {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -173,10 +173,8 @@ entry:
}
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3}
!0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0}
-!1 = !{ptr @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1}
!2 = !{i32 7, !"openmp", i32 50}
!3 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
index 310ac0a8296c33..2b3a7fabfb4595 100644
--- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
+++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
@@ -18,7 +18,7 @@ target triple = "nvptx64"
; CHECK: @none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
; CHECK: @will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
;.
-define weak void @is_spmd() "kernel" {
+define weak ptx_kernel void @is_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@is_spmd
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment, ptr null)
@@ -36,7 +36,7 @@ define weak void @is_spmd() "kernel" {
ret void
}
-define weak void @will_be_spmd() "kernel" {
+define weak ptx_kernel void @will_be_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
@@ -70,7 +70,7 @@ user_code.entry:
ret void
}
-define weak void @non_spmd() "kernel" {
+define weak ptx_kernel void @non_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@non_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment, ptr null)
@@ -88,7 +88,7 @@ define weak void @non_spmd() "kernel" {
ret void
}
-define weak void @will_not_be_spmd() "kernel" {
+define weak ptx_kernel void @will_not_be_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment, ptr null)
@@ -207,14 +207,9 @@ declare void @foo()
declare void @bar()
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4, !5}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{ptr @is_spmd, !"kernel", i32 1}
-!3 = !{ptr @will_be_spmd, !"kernel", i32 1}
-!4 = !{ptr @non_spmd, !"kernel", i32 1}
-!5 = !{ptr @will_not_be_spmd, !"kernel", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" }
@@ -223,8 +218,4 @@ declare void @bar()
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @is_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @will_be_spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @non_spmd, !"kernel", i32 1}
-; CHECK: [[META5:![0-9]+]] = !{ptr @will_not_be_spmd, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 5c4386b24a3d5a..1679a27fdae8bc 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -43,7 +43,7 @@ target triple = "nvptx64"
; CHECK: @__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; CHECK: @__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
-define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
+define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
@@ -127,7 +127,7 @@ entry:
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
-define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
+define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
@@ -315,13 +315,10 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11
!omp_offload.info = !{!0, !1}
-!nvvm.annotations = !{!2, !3}
!llvm.module.flags = !{!4, !5}
!0 = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0}
!1 = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1}
-!2 = !{ptr @__omp_offloading_10302_bd7e0_main_l13, !"kernel", i32 1}
-!3 = !{ptr @__omp_offloading_10302_bd7e0_main_l16, !"kernel", i32 1}
!4 = !{i32 7, !"openmp", i32 50}
!5 = !{i32 7, !"openmp-device", i32 50}
@@ -336,8 +333,6 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l13, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l16, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
index fd6e7683af8e3e..e5f65b26ed223c 100644
--- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
@@ -16,7 +16,7 @@ target triple = "nvptx64"
; CHECK: @spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
; CHECK: @parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
;.
-define weak void @none_spmd() "kernel" {
+define weak ptx_kernel void @none_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@none_spmd
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment, ptr null)
@@ -32,7 +32,7 @@ define weak void @none_spmd() "kernel" {
ret void
}
-define weak void @spmd() "kernel" {
+define weak ptx_kernel void @spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment, ptr null)
@@ -48,7 +48,7 @@ define weak void @spmd() "kernel" {
ret void
}
-define weak void @parallel() "kernel" {
+define weak ptx_kernel void @parallel() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@parallel
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment, ptr null)
@@ -136,20 +136,13 @@ declare i32 @__kmpc_target_init(ptr, ptr) #1
declare void @__kmpc_target_deinit() #1
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{ptr @none_spmd, !"kernel", i32 1}
-!3 = !{ptr @spmd, !"kernel", i32 1}
-!4 = !{ptr @parallel, !"kernel", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" }
; CHECK: attributes #[[ATTR1]] = { alwaysinline }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 31e3ef2b9079f6..29f2030c4d42b5 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -40,7 +40,7 @@ define weak i32 @__kmpc_target_init(ptr %0, ptr) {
}
declare void @__kmpc_target_deinit()
-define void @kernel(ptr %dyn) "kernel" {
+define ptx_kernel void @kernel(ptr %dyn) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@kernel
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -98,14 +98,14 @@ define internal void @bar() {
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
-; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
-; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -146,7 +146,7 @@ define void @unused() {
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG11:![0-9]+]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG10:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
;
@@ -234,14 +234,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !6, !7}
-!nvvm.annotations = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
-!5 = !{ptr @kernel, !"kernel", i32 1}
!6 = !{i32 7, !"openmp", i32 50}
!7 = !{i32 7, !"openmp-device", i32 50}
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
@@ -276,10 +274,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]])
-; CHECK: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK: [[META10]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]])
+; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
@@ -288,11 +285,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]])
-; CHECK-DISABLED: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK-DISABLED: [[META10]] = !DISubroutineType(types: [[META2]])
-; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: [[META9]])
+; CHECK-DISABLED: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]])
+; CHECK-DISABLED: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK-DISABLED: [[META9]] = !DISubroutineType(types: [[META2]])
+; CHECK-DISABLED: [[DBG10]] = !DILocation(line: 6, column: 2, scope: [[META8]])
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-REMARKS: {{.*}}
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 6e4fb9e57388ba..92cfd750492269 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -25,7 +25,7 @@ target triple = "nvptx64"
@baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-define dso_local void @foo(ptr %dyn) "kernel" {
+define dso_local ptx_kernel void @foo(ptr %dyn) "kernel" {
entry:
%c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr %dyn)
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -36,7 +36,7 @@ entry:
ret void
}
-define void @bar(ptr %dyn) "kernel" {
+define ptx_kernel void @bar(ptr %dyn) "kernel" {
%c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment, ptr %dyn)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
@@ -60,7 +60,7 @@ exit:
ret void
}
-define void @baz_spmd(ptr %dyn) "kernel" {
+define ptx_kernel void @baz_spmd(ptr %dyn) "kernel" {
%c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment, ptr %dyn)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
@@ -109,7 +109,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
-!nvvm.annotations = !{!7, !8, !13}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
@@ -118,9 +117,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{ptr @foo, !"kernel", i32 1}
-!8 = !{ptr @bar, !"kernel", i32 1}
-!13 = !{ptr @baz_spmd, !"kernel", i32 1}
!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!10 = !DISubroutineType(types: !2)
!11 = !DILocation(line: 5, column: 7, scope: !9)
@@ -177,7 +173,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
; CHECK: master3:
-; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG7:![0-9]+]]
; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR7]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]]
; CHECK-NEXT: br label [[EXIT]]
@@ -231,12 +227,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1}
-; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1}
-; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1}
-; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: [[META11:![0-9]+]])
-; CHECK: [[META11]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META12:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK: [[META12]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG7]] = !DILocation(line: 5, column: 14, scope: [[META8:![0-9]+]])
+; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-LIMIT: {{.*}}
diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index c186e5f04f092d..70b9ce41c1a430 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -16,7 +16,7 @@
; CHECK: [openmp-opt] Basic block @kernel if.then is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
-define void @kernel(ptr %dyn) "kernel" {
+define ptx_kernel void @kernel(ptr %dyn) "kernel" {
%call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
@@ -116,7 +116,6 @@ attributes #0 = { cold noinline }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
-!nvvm.annotations = !{!7}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "single_threaded_execution.c", directory: "/tmp/single_threaded_execution.c")
@@ -125,7 +124,6 @@ attributes #0 = { cold noinline }
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{ptr @kernel, !"kernel", i32 1}
!8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!10 = !DISubroutineType(types: !2)
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 6ff4b96b57556c..983175382f0f0f 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -105,36 +105,6 @@
@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
-; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
@@ -226,7 +196,7 @@
; NVPTX-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef
; NVPTX-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef
;.
-define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
@@ -256,15 +226,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; NVPTX-DISABLED-NEXT: ret void
call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
ret void
}
@@ -282,7 +243,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -299,7 +260,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -350,7 +311,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -401,7 +362,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -451,7 +412,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -501,114 +462,10 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__1_wrapper.ID
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__1_wrapper.ID
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -643,10 +500,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -661,10 +518,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -679,10 +536,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -697,10 +554,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -715,10 +572,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -733,45 +590,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED: for.cond:
-; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED: for.cond.cleanup:
-; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: for.body:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED: for.cond:
-; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED: for.cond.cleanup:
-; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: for.body:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
br label %for.cond
@@ -829,17 +651,6 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
call void @unknown() #11
ret void
@@ -906,25 +717,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -937,7 +729,7 @@ entry:
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
@@ -950,7 +742,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -967,7 +759,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1018,7 +810,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1069,7 +861,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1119,7 +911,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1169,114 +961,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__3_wrapper.ID
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__3_wrapper.ID
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -1314,10 +1002,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1334,10 +1022,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1355,10 +1043,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1376,10 +1064,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1396,10 +1084,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1416,50 +1104,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
-; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED: for.cond:
-; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED: for.cond.cleanup:
-; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: for.body:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED: for.cond:
-; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED: for.cond.cleanup:
-; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: for.body:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -1519,17 +1167,6 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
call void @unknown() #11
ret void
@@ -1596,25 +1233,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -1628,7 +1246,7 @@ entry:
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
@@ -1641,7 +1259,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -1658,7 +1276,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1709,7 +1327,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1760,7 +1378,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1810,7 +1428,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1860,114 +1478,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -2002,11 +1516,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2021,11 +1535,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2040,11 +1554,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2059,11 +1573,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2078,11 +1592,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2097,48 +1611,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED: for.cond:
-; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED: for.cond.cleanup:
-; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: for.body:
-; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED: for.cond:
-; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED: for.cond.cleanup:
-; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: for.body:
-; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -2167,73 +1644,56 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%0 = load i32, ptr %x, align 4, !tbaa !18
%inc = add nsw i32 %0, 1
@@ -2252,7 +1712,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
@@ -2264,7 +1724,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
@@ -2276,7 +1736,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
@@ -2288,7 +1748,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
@@ -2300,7 +1760,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
@@ -2312,32 +1772,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -2352,7 +1789,7 @@ entry:
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
@@ -2365,7 +1802,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -2382,7 +1819,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -2433,7 +1870,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2484,7 +1921,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2534,7 +1971,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2584,114 +2021,10 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -2723,7 +2056,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; AMDGPU: region.guarded:
-; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]]
; AMDGPU: region.guarded.end:
; AMDGPU-NEXT: br label [[REGION_BARRIER]]
@@ -2740,11 +2073,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2756,7 +2089,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; NVPTX: region.guarded:
-; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]]
; NVPTX: region.guarded.end:
; NVPTX-NEXT: br label [[REGION_BARRIER]]
@@ -2773,17 +2106,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2793,17 +2126,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2813,17 +2146,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2833,17 +2166,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2853,50 +2186,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED: for.cond:
-; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED: for.cond.cleanup:
-; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: for.body:
-; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED: for.cond:
-; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED: for.cond.cleanup:
-; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: for.body:
-; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -2926,73 +2220,56 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%0 = load i32, ptr %x, align 4, !tbaa !18
%inc = add nsw i32 %0, 1
@@ -3011,7 +2288,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
@@ -3023,7 +2300,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
@@ -3035,7 +2312,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
@@ -3047,7 +2324,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
@@ -3059,7 +2336,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
@@ -3071,32 +2348,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -3111,7 +2365,7 @@ entry:
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
@@ -3384,96 +2638,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
@@ -3530,24 +2694,13 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
call void @unknown() #11
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
@@ -3862,110 +3015,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED: is_worker_check:
-; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.finished:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED: thread.user_code.check:
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED: common.ret:
-; AMDGPU-DISABLED-NEXT: ret void
-; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED: is_worker_check:
-; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED: worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED: worker_state_machine.finished:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: worker_state_machine.is_active.check:
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
-; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED: worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED: thread.user_code.check:
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED: common.ret:
-; NVPTX-DISABLED-NEXT: ret void
-; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
@@ -4021,17 +3070,6 @@ define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id.,
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
call void @spmd_amenable() #10
ret void
@@ -4093,13 +3131,6 @@ define weak i32 @__kmpc_target_init(ptr, ptr) {
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; NVPTX-DISABLED2-NEXT: ret i32 0
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
-; AMDGPU-DISABLED-NEXT: ret i32 0
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
-; NVPTX-DISABLED-NEXT: ret i32 0
ret i32 0
}
@@ -4158,16 +3189,6 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
call void @unknown() #11
ret void
@@ -4234,25 +3255,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
-;
-; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT: ret void
-; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT: ret void
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -4280,7 +3282,6 @@ attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
attributes #11 = { convergent }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
-!nvvm.annotations = !{!6, !7, !8, !9, !10, !11}
!llvm.module.flags = !{!12, !13, !14, !15, !16}
!llvm.ident = !{!17}
@@ -4290,12 +3291,6 @@ attributes #11 = { convergent }
!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-!6 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-!7 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-!8 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-!9 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-!10 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-!11 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
!12 = !{i32 1, !"wchar_size", i32 4}
!13 = !{i32 7, !"openmp", i32 50}
!14 = !{i32 7, !"openmp-device", i32 50}
@@ -4317,92 +3312,6 @@ attributes #11 = { convergent }
!30 = !{!31, !27, i64 0}
!31 = !{!"kmp_task_t_with_privates", !32, i64 0}
!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
-; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse }
-; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent }
-; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse }
-; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent }
-; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
-; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
-; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
-; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
-; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
-; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
-; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
-; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
-; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
-; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
-; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
-; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
-; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
;.
; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
@@ -4488,30 +3397,24 @@ attributes #11 = { convergent }
; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4519,30 +3422,24 @@ attributes #11 = { convergent }
; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4550,30 +3447,24 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4581,30 +3472,24 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4612,30 +3497,24 @@ attributes #11 = { convergent }
; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4643,28 +3522,22 @@ attributes #11 = { convergent }
; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index 2f43a4e4286a23..99715cf5b4032c 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -28,7 +28,7 @@ target triple = "nvptx64"
; CHECK: @__omp_offloading_fd02_404433c2_main_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
-define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
+define weak ptx_kernel void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
; CHECK-SAME: (ptr [[DYN:%.*]], ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -47,7 +47,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull a
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
-; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA7:![0-9]+]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -127,12 +127,10 @@ attributes #5 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "st
attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
-!1 = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
@@ -154,15 +152,14 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
-; CHECK: [[META9]] = !{!"double", [[META10:![0-9]+]], i64 0}
-; CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0}
-; CHECK: [[META11]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"double", [[META9:![0-9]+]], i64 0}
+; CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
+; CHECK: [[META10]] = !{!"Simple C/C++ TBAA"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
index 75e01f3295fe23..953ecb2ddd8a66 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
@@ -297,12 +297,10 @@ attributes #14 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier,ompx_
attributes #15 = { convergent nounwind }
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4, !5}
!llvm.ident = !{!6}
!0 = !{i32 0, i32 32, i32 18757968, !"main", i32 12, i32 0}
-!1 = !{ptr @__omp_offloading_20_11e3950_main_l12, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 229a49d7845593..bbf1de253de92a 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -58,7 +58,7 @@ target triple = "nvptx64"
; CHECK-DISABLED: @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLED: @__omp_outlined__1_wrapper.ID = private constant i8 undef
;.
-define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x, i64 %N) #0 {
+define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x, i64 %N) #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6
; CHECK-SAME: (ptr [[DYN:%.*]], ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -84,9 +84,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
-; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]]
-; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]]
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -111,7 +111,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
; CHECK: region.guarded4:
-; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
; CHECK: region.guarded.end1:
; CHECK-NEXT: br label [[REGION_BARRIER2]]
@@ -120,10 +120,10 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: br label [[REGION_EXIT3]]
; CHECK: region.exit3:
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
-; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: __omp_outlined__.exit:
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0)
-; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
@@ -132,7 +132,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
; CHECK: region.guarded9:
-; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
; CHECK: region.guarded.end6:
; CHECK-NEXT: br label [[REGION_BARRIER7]]
@@ -140,7 +140,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
; CHECK: region.exit8:
-; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
@@ -149,7 +149,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
; CHECK: region.guarded14:
-; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
; CHECK: region.guarded.end11:
; CHECK-NEXT: br label [[REGION_BARRIER12]]
@@ -157,7 +157,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
; CHECK: region.exit13:
-; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
@@ -166,7 +166,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
; CHECK: region.guarded19:
-; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
; CHECK: region.guarded.end16:
; CHECK-NEXT: br label [[REGION_BARRIER17]]
@@ -174,9 +174,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
; CHECK: region.exit18:
-; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
@@ -230,13 +230,13 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-DISABLED-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
; CHECK-DISABLED-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]]
+; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
-; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK-DISABLED: for.cond.i:
@@ -248,26 +248,26 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
-; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
-; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-DISABLED: __omp_outlined__.exit:
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
-; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
@@ -404,12 +404,10 @@ attributes #4 = { inaccessiblememonly nofree nosync nounwind willreturn }
attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-!1 = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
@@ -447,30 +445,28 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK: [[META8]] = !{[[META9:![0-9]+]]}
-; CHECK: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"}
-; CHECK: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK: [[META12]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"}
+; CHECK: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]}
+; CHECK: [[META11]] = !{!"llvm.loop.mustprogress"}
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK-DISABLED: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
-; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLED: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK-DISABLED: [[META8]] = !{[[META9:![0-9]+]]}
-; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"}
-; CHECK-DISABLED: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"}
-; CHECK-DISABLED: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK-DISABLED: [[META12]] = !{!"llvm.loop.mustprogress"}
+; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLED: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLED: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK-DISABLED: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"}
+; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"}
+; CHECK-DISABLED: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]}
+; CHECK-DISABLED: [[META11]] = !{!"llvm.loop.mustprogress"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
index 11405b7eb447c2..a644fe1b2f8217 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
@@ -56,7 +56,7 @@ target triple = "nvptx64"
; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
-define weak void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) "kernel" #0 {
+define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -113,7 +113,7 @@ define weak i32 @__kmpc_target_init(ptr, ptr) {
declare void @__kmpc_target_deinit()
; Function Attrs: convergent noinline norecurse nounwind
-define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
@@ -321,14 +321,12 @@ attributes #4 = { alwaysinline }
attributes #5 = { convergent }
!omp_offload.info = !{!0, !1}
-!nvvm.annotations = !{!2, !3}
!llvm.module.flags = !{!4, !5, !6, !7, !8}
!llvm.ident = !{!9}
!0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
!1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
+
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
@@ -358,23 +356,19 @@ attributes #5 = { convergent }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index f348825446c63d..6dfc14e9270ed2 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -28,7 +28,7 @@
; NVPTX: @spmd_and_non_spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; NVPTX: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
-define weak void @spmd_callees(i1 %c) #0 {
+define weak ptx_kernel void @spmd_callees(i1 %c) #0 {
; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees
; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU-NEXT: call void @spmd_callees__debug(i1 [[C]])
@@ -57,7 +57,7 @@ define internal void @spmd_callees__debug(i1 %c) {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -88,7 +88,7 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -143,10 +143,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -161,10 +161,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
@@ -262,10 +262,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -282,10 +282,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
@@ -367,7 +367,7 @@ entry:
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
+define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
;
;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee
@@ -413,7 +413,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -473,7 +473,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -530,11 +530,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -551,11 +551,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
@@ -587,18 +587,18 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknown() #[[ATTR7]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknown() #[[ATTR7]]
; NVPTX-NEXT: ret void
;
@@ -622,7 +622,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
@@ -634,7 +634,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
@@ -652,7 +652,7 @@ entry:
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @spmd_callees_metadata(ptr %fp) #0 {
+define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
;
;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata
@@ -668,7 +668,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -686,7 +686,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -711,7 +711,7 @@ user_code.entry: ; preds = %entry
}
; Function Attrs: alwaysinline convergent norecurse nounwind
-define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
+define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
;
;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata
@@ -757,7 +757,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AMDGPU: 3:
@@ -816,7 +816,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; NVPTX: 3:
@@ -868,10 +868,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -885,10 +885,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
br label %for.cond
@@ -1069,7 +1069,6 @@ attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
attributes #11 = { convergent }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
-!nvvm.annotations = !{!6, !7, !8, !9, !10, !11}
!llvm.module.flags = !{!12, !13, !14, !15, !16}
!llvm.ident = !{!17}
@@ -1079,12 +1078,6 @@ attributes #11 = { convergent }
!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-!6 = !{ptr @spmd_callees, !"kernel", i32 1}
-!7 = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1}
-!8 = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1}
-!9 = !{ptr @spmd_callees_metadata, !"kernel", i32 1}
-!10 = !{i32 1}
-!11 = !{i32 1}
!12 = !{i32 1, !"wchar_size", i32 4}
!13 = !{i32 7, !"openmp", i32 50}
!14 = !{i32 7, !"openmp-device", i32 50}
@@ -1139,29 +1132,24 @@ attributes #11 = { convergent }
; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1}
-; AMDGPU: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
-; AMDGPU: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0}
-; AMDGPU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
-; AMDGPU: [[META20]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]}
-; AMDGPU: [[META22]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU: [[META23]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]}
-; AMDGPU: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
-; AMDGPU: [[META26]] = !{!"any pointer", [[META19]], i64 0}
-; AMDGPU: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]}
-; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]}
+; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -1169,27 +1157,22 @@ attributes #11 = { convergent }
; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1}
-; NVPTX: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
-; NVPTX: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0}
-; NVPTX: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
-; NVPTX: [[META20]] = !{!"Simple C/C++ TBAA"}
-; NVPTX: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]}
-; NVPTX: [[META22]] = !{!"llvm.loop.mustprogress"}
-; NVPTX: [[META23]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]}
-; NVPTX: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
-; NVPTX: [[META26]] = !{!"any pointer", [[META19]], i64 0}
-; NVPTX: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]}
-; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]}
+; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
index f28f61e053275c..1cfce147ac81ec 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
@@ -57,7 +57,7 @@ target triple = "nvptx64"
; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLE-SPMDIZATION: @__omp_outlined___wrapper.ID = private constant i8 undef
;.
-define weak void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
@@ -147,7 +147,7 @@ define weak i32 @__kmpc_target_init(ptr, ptr) {
declare void @__kmpc_target_deinit()
; Function Attrs: convergent noinline norecurse nounwind
-define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
+define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
@@ -397,14 +397,11 @@ attributes #4 = { alwaysinline }
attributes #5 = { convergent }
!omp_offload.info = !{!0, !1}
-!nvvm.annotations = !{!2, !3}
!llvm.module.flags = !{!4, !5, !6, !7, !8}
!llvm.ident = !{!9}
!0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
!1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
@@ -434,23 +431,19 @@ attributes #5 = { convergent }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
index f5a4cea9a841cb..ef36937bc57348 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
@@ -62,7 +62,7 @@ target triple = "nvptx64"
; Function Attrs: convergent norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 {
+define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18
@@ -107,7 +107,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
declare void @__kmpc_target_deinit() local_unnamed_addr
; Function Attrs: norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 {
+define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33
@@ -175,7 +175,6 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
!llvm.dbg.cu = !{!0}
!omp_offload.info = !{!3, !4}
-!nvvm.annotations = !{!5, !6}
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
!llvm.ident = !{!14}
@@ -184,8 +183,6 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
!2 = !{}
!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
!7 = !{i32 7, !"Dwarf Version", i32 2}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index 5e2abbae1811c6..2842dfd030b114 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -53,7 +53,7 @@ target triple = "amdgcn-amd-amdhsa"
; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
;.
-define void @kernel(ptr %dyn) "kernel" {
+define amdgpu_kernel void @kernel(ptr %dyn) "kernel" {
;
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel
@@ -144,7 +144,7 @@ define void @test_assume() {
}
; We can't ignore the sync, hence this might store 2 into %p
-define void @kernel2(ptr %p) "kernel" {
+define amdgpu_kernel void @kernel2(ptr %p) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
@@ -163,7 +163,7 @@ define void @kernel2(ptr %p) "kernel" {
}
; We can't ignore the sync, hence this might store 2 into %p
-define void @kernel3(ptr %p) "kernel" {
+define amdgpu_kernel void @kernel3(ptr %p) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel3
; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
@@ -199,7 +199,7 @@ define void @sync_def() {
ret void
}
-define void @kernel4a1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4a1(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4a1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4
@@ -242,7 +242,7 @@ S:
}
; We should not replace the load or delete the second store.
-define void @kernel4b1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4b1(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4
@@ -281,7 +281,7 @@ S:
ret void
}
-define void @kernel4a2(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4a2(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4a2
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -317,7 +317,7 @@ S:
}
; FIXME: We should not replace the load with undef.
-define void @kernel4b2(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4b2(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4b2
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -349,7 +349,7 @@ S:
ret void
}
-define void @kernel4a3(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4a3(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4a3
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4
@@ -401,7 +401,7 @@ S:
}
; The load of QB3 should not be simplified to 0.
-define void @kernel4b3(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4b3(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel4b3
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4
@@ -453,7 +453,7 @@ S:
}
-define void @kernel4c1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4c1(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4c1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -488,7 +488,7 @@ S:
}
; We should not replace the load or delete the second store.
-define void @kernel4d1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4d1(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4d1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -529,7 +529,7 @@ S:
ret void
}
-define void @kernel4c2(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4c2(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4c2
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -563,7 +563,7 @@ S:
}
; We should not replace the load with undef.
-define void @kernel4d2(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4d2(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4d2
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -595,7 +595,7 @@ S:
ret void
}
-define void @kernel4c3(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4c3(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4c3
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -629,7 +629,7 @@ S:
}
; We should not replace the load with undef.
-define void @kernel4d3(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel4d3(i1 %c) "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel4d3
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
@@ -661,7 +661,7 @@ S:
ret void
}
-define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -700,7 +700,7 @@ S:
ret void
}
-define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -741,7 +741,7 @@ S:
ret void
}
-define void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -782,7 +782,7 @@ S:
ret void
}
-define void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
+define amdgpu_kernel void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
@@ -828,29 +828,9 @@ declare void @__kmpc_target_deinit() nocallback
declare void @llvm.assume(i1)
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{ptr @kernel, !"kernel", i32 1}
-!3 = !{ptr @kernel2, !"kernel", i32 1}
-!4 = !{ptr @kernel3, !"kernel", i32 1}
-!5 = !{ptr @kernel4a1, !"kernel", i32 1}
-!6 = !{ptr @kernel4b1, !"kernel", i32 1}
-!7 = !{ptr @kernel4a2, !"kernel", i32 1}
-!8 = !{ptr @kernel4b2, !"kernel", i32 1}
-!9 = !{ptr @kernel4a3, !"kernel", i32 1}
-!10 = !{ptr @kernel4b3, !"kernel", i32 1}
-!11 = !{ptr @kernel4c1, !"kernel", i32 1}
-!12 = !{ptr @kernel4d1, !"kernel", i32 1}
-!13 = !{ptr @kernel4c2, !"kernel", i32 1}
-!14 = !{ptr @kernel4d2, !"kernel", i32 1}
-!15 = !{ptr @kernel4c3, !"kernel", i32 1}
-!16 = !{ptr @kernel4d3, !"kernel", i32 1}
-!17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
-!18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
-!19 = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
-!20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
@@ -872,45 +852,7 @@ declare void @llvm.assume(i1)
;.
; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; TUNIT: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; TUNIT: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
-; TUNIT: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
-; TUNIT: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
-; TUNIT: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
-; TUNIT: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
-; TUNIT: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
-; TUNIT: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
-; TUNIT: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
-; TUNIT: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
-; TUNIT: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
-; TUNIT: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
-; TUNIT: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
-; TUNIT: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
-; TUNIT: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
-; TUNIT: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
-; TUNIT: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
-; TUNIT: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
-; TUNIT: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CGSCC: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CGSCC: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
-; CGSCC: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
-; CGSCC: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
-; CGSCC: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
-; CGSCC: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
-; CGSCC: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
-; CGSCC: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
-; CGSCC: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
-; CGSCC: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
-; CGSCC: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
-; CGSCC: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
-; CGSCC: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
-; CGSCC: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
-; CGSCC: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
-; CGSCC: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
-; CGSCC: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
-; CGSCC: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
-; CGSCC: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
More information about the llvm-commits
mailing list