[clang] [llvm] [NVPTX] Add NVVMUpgradeAnnotations pass to cleanup legacy annotations (PR #119261)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 12:29:05 PST 2024
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/119261
>From f9f30a77f5e7232f968a3063c34338c9dfc7bac5 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 8 Nov 2024 22:39:34 +0000
Subject: [PATCH 1/5] [NVPTX] Add NVVMUpgradeAnnotations pass to cleanup legacy
annotations
---
llvm/lib/Target/NVPTX/CMakeLists.txt | 1 +
llvm/lib/Target/NVPTX/NVPTX.h | 5 +
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 +
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 9 +-
.../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 130 ++++++++++++++++++
.../CodeGen/NVPTX/upgrade-nvvm-annotations.ll | 30 ++++
6 files changed, 177 insertions(+), 2 deletions(-)
create mode 100644 llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
create mode 100644 llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693365161330f5..bb2e4ad48b51d8 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -39,6 +39,7 @@ set(NVPTXCodeGen_sources
NVVMReflect.cpp
NVPTXProxyRegErasure.cpp
NVPTXCtorDtorLowering.cpp
+ NVVMUpgradeAnnotations.cpp
)
add_llvm_target(NVPTXCodeGen
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index ca915cd3f3732f..53418148be3615 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -52,6 +52,7 @@ FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
bool NoTrapAfterNoreturn);
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
+ModulePass *createNVVMUpgradeAnnotationsPass();
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -74,6 +75,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+struct NVVMUpgradeAnnotationsPass : PassInfoMixin<NVVMUpgradeAnnotationsPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5c5e9420ee737..b4fd36625adc9c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -254,6 +254,8 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
+ PM.addPass(NVVMUpgradeAnnotationsPass());
+
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
@@ -349,6 +351,8 @@ void NVPTXPassConfig::addIRPasses() {
AAR.addAAResult(WrapperPass->getResult());
}));
+ addPass(createNVVMUpgradeAnnotationsPass());
+
// NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 98bffd92a087b6..04e83576cbf958 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -311,11 +311,16 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
}
bool isKernelFunction(const Function &F) {
+ if (F.getCallingConv() == CallingConv::PTX_Kernel)
+ return true;
+
+ if (F.hasFnAttribute("nvvm.kernel"))
+ return true;
+
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
return (*X == 1);
- // There is no NVVM metadata, check the calling convention
- return F.getCallingConv() == CallingConv::PTX_Kernel;
+ return false;
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
new file mode 100644
index 00000000000000..ca550434835a2c
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
@@ -0,0 +1,130 @@
+//===- NVVMUpgradeAnnotations.cpp - Upgrade NVVM Annotations --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces deprecated metadata in nvvm.annotation with a more modern
+// IR representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include <cstdint>
+
+#define DEBUG_TYPE "nvvm-upgrade-annotations"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVVMUpgradeAnnotationsLegacyPassPass(PassRegistry &);
+} // namespace llvm
+
+namespace {
+
+class NVVMUpgradeAnnotationsLegacyPass : public ModulePass {
+public:
+ static char ID;
+ NVVMUpgradeAnnotationsLegacyPass() : ModulePass(ID) {
+ initializeNVVMUpgradeAnnotationsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+};
+} // namespace
+
+char NVVMUpgradeAnnotationsLegacyPass::ID = 0;
+
+bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) {
+ if (K == "kernel") {
+ assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
+ F->addFnAttr("nvvm.kernel");
+ return true;
+ }
+ if (K == "align") {
+ const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ const unsigned Idx = (AlignBits >> 16);
+ const Align StackAlign = Align(AlignBits & 0xFFFF);
+ // TODO: Skip adding the stackalign attribute for returns, for now.
+ if (!Idx)
+ return false;
+ F->addAttributeAtIndex(
+ Idx, Attribute::getWithStackAlignment(F->getContext(), StackAlign));
+ return true;
+ }
+
+ return false;
+}
+
+// Iterate over nvvm.annotations rewriting them as appropiate.
+void static upgradeNVAnnotations(Module &M) {
+ NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
+ if (!NamedMD)
+ return;
+
+ SmallVector<MDNode *, 8> NewNodes;
+ SmallSet<const MDNode *, 8> SeenNodes;
+ for (MDNode *MD : NamedMD->operands()) {
+ if (SeenNodes.contains(MD))
+ continue;
+ SeenNodes.insert(MD);
+
+ Function *F = mdconst::dyn_extract_or_null<Function>(MD->getOperand(0));
+ if (!F)
+ continue;
+
+ assert(MD && "Invalid MDNode for annotation");
+ assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
+
+ SmallVector<Metadata *, 8> NewOperands;
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
+ MDString *K = cast<MDString>(MD->getOperand(j));
+ const MDOperand &V = MD->getOperand(j + 1);
+ bool Upgraded = autoUpgradeAnnotation(F, K->getString(), V);
+ if (!Upgraded)
+ NewOperands.append({K, V});
+ }
+
+ if (!NewOperands.empty()) {
+ NewOperands.insert(NewOperands.begin(), MD->getOperand(0));
+ NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
+ }
+ }
+
+ NamedMD->clearOperands();
+ for (MDNode *N : NewNodes)
+ NamedMD->addOperand(N);
+}
+
+PreservedAnalyses NVVMUpgradeAnnotationsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ upgradeNVAnnotations(M);
+ return PreservedAnalyses::all();
+}
+
+bool NVVMUpgradeAnnotationsLegacyPass::runOnModule(Module &M) {
+ upgradeNVAnnotations(M);
+ return false;
+}
+
+INITIALIZE_PASS(NVVMUpgradeAnnotationsLegacyPass, DEBUG_TYPE,
+ "NVVMUpgradeAnnotations", false, false)
+
+ModulePass *llvm::createNVVMUpgradeAnnotationsPass() {
+ return new NVVMUpgradeAnnotationsLegacyPass();
+}
diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
new file mode 100644
index 00000000000000..68dc2353858cb3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+define i32 @bar(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+!nvvm.annotations = !{!0, !1, !2}
+
+!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010}
+!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008}
+!2 = !{ptr @bar, !"kernel", i32 1}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "nvvm.kernel" }
+;.
+; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8}
+;.
>From b65b1d8f30b0aadb1152d07eedf39545e1c8fd65 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 9 Dec 2024 22:43:39 +0000
Subject: [PATCH 2/5] fixups
---
clang/lib/CodeGen/Targets/NVPTX.cpp | 36 ++++++++++++-------
clang/test/CodeGen/nvptx_attributes.c | 8 ++++-
clang/test/CodeGenCUDA/grid-constant.cu | 21 ++++++++---
clang/test/CodeGenCUDA/ptx-kernels.cu | 7 ++--
clang/test/CodeGenCUDA/usual-deallocators.cu | 4 +--
clang/test/CodeGenOpenCL/ptx-calls.cl | 4 +--
clang/test/CodeGenOpenCL/ptx-kernels.cl | 4 +--
clang/test/CodeGenOpenCL/reflect.cl | 8 ++++-
.../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 12 ++++---
9 files changed, 70 insertions(+), 34 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 0431d2cc4ddc39..2fddaf8efad10d 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -78,14 +78,12 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
- static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand,
+ static void
+ addNVVMGridConstantMetadata(llvm::GlobalValue *GV,
const SmallVectorImpl<int> &GridConstantArgs);
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand) {
- addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
- }
+ int Operand);
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +257,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ F->addFnAttr("nvvm.kernel");
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
@@ -277,21 +275,20 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1, GCI);
+ addNVVMGridConstantMetadata(F, GCI);
+ F->addFnAttr("nvvm.kernel");
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
}
// Attach kernel metadata directly if compiling for NVPTX.
- if (FD->hasAttr<NVPTXKernelAttr>()) {
- addNVVMMetadata(F, "kernel", 1);
- }
+ if (FD->hasAttr<NVPTXKernelAttr>())
+ F->addFnAttr("nvvm.kernel");
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(
- llvm::GlobalValue *GV, StringRef Name, int Operand,
- const SmallVectorImpl<int> &GridConstantArgs) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+ StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
@@ -302,6 +299,19 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+ // Append metadata to nvvm.annotations
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void NVPTXTargetCodeGenInfo::addNVVMGridConstantMetadata(
+ llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
+ llvm::Module *M = GV->getParent();
+ llvm::LLVMContext &Ctx = M->getContext();
+
+ // Get "nvvm.annotations" metadata node
+ llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+ SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
diff --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c
index 7dbd9f1321e280..2edca9cd28c815 100644
--- a/clang/test/CodeGen/nvptx_attributes.c
+++ b/clang/test/CodeGen/nvptx_attributes.c
@@ -10,8 +10,14 @@
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4
// CHECK-NEXT: ret void
+//
__attribute__((nvptx_kernel)) void foo(int *ret) {
*ret = 1;
}
-// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
index 8d4be9c9dc7e1e..34c94009dc4d75 100644
--- a/clang/test/CodeGenCUDA/grid-constant.cu
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -5,6 +5,15 @@
struct S {};
+// CHECK-LABEL: define dso_local void @_Z6kernel1Sii(
+// CHECK-SAME: ptr noundef byval([[STRUCT_S:%.*]]) align 1 [[GC_ARG1:%.*]], i32 noundef [[ARG2:%.*]], i32 noundef [[GC_ARG3:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ARG2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[GC_ARG3_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[ARG2]], ptr [[ARG2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[GC_ARG3]], ptr [[GC_ARG3_ADDR]], align 4
+// CHECK-NEXT: ret void
+//
__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
// dependent arguments get diagnosed after instantiation.
@@ -20,12 +29,16 @@ void foo() {
tkernel<const S><<<1,1>>>(1, {});
}
//.
+// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32" "uniform-work-group-size"="true" }
//.
-// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
+// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
-// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
+// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
-// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
-// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
+// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
+// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
+// CHECK: [[META7:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META8:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
+// CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index b7172b77369296..dd64bd822c01e0 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -10,7 +10,7 @@
extern "C"
__device__ void device_function() {}
-// CHECK-LABEL: define{{.*}} void @global_function
+// CHECK-LABEL: define{{.*}} void @global_function{{.*}} #[[ATTR0:[0-9]+]]
extern "C"
__global__ void global_function() {
// CHECK: call void @device_function
@@ -23,7 +23,7 @@ template <typename T> __global__ void templated_kernel(T param) {}
namespace {
__global__ void anonymous_ns_kernel() {}
-// CHECK-DAG: define{{.*}} void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
+// CHECK-DAG: define{{.*}} void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv({{.*}} #[[ATTR0]]
}
void host_function() {
@@ -31,5 +31,4 @@ void host_function() {
anonymous_ns_kernel<<<0,0>>>();
}
-// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
-// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
+// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
index b85a706813fc2b..21616b19ae135a 100644
--- a/clang/test/CodeGenCUDA/usual-deallocators.cu
+++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
@@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
}
// Make sure that we've generated the kernel used by A::~A.
-// DEVICE-LABEL: define void @_Z1fIiEvT_
+// DEVICE: define void @_Z1fIiEvT_{{.*}} #[[ATTR0:[0-9]+]]
// Make sure we've picked deallocator for the correct side of compilation.
@@ -148,4 +148,4 @@ __host__ __device__ void tests_hd(void *t) {
// DEVICE: call void @dev_fn()
// HOST: call void @host_fn()
-// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}
+// DEVICE: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index 0081152ae40e01..c914db87572cee 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -7,7 +7,7 @@ void device_function() {
__kernel void kernel_function() {
device_function();
}
-// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
+// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]]
// CHECK: call void @device_function()
-// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
+// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl
index 210e5682ac721c..93f2df6c49e82a 100644
--- a/clang/test/CodeGenOpenCL/ptx-kernels.cl
+++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl
@@ -6,6 +6,6 @@ void device_function() {
__kernel void kernel_function() {
}
-// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
+// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]]
-// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
+// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..88ff74022e6c5f 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -13,7 +13,7 @@ bool device_function() {
}
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
@@ -26,3 +26,9 @@ bool device_function() {
__kernel void kernel_function(__global int *i) {
*i = device_function();
}
+//.
+// CHECK: [[META3]] = !{i32 1}
+// CHECK: [[META4]] = !{!"none"}
+// CHECK: [[META5]] = !{!"int*"}
+// CHECK: [[META6]] = !{!""}
+//.
diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
index ca550434835a2c..27415be5034f96 100644
--- a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -48,10 +49,11 @@ class NVVMUpgradeAnnotationsLegacyPass : public ModulePass {
char NVVMUpgradeAnnotationsLegacyPass::ID = 0;
-bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) {
+bool static autoUpgradeAnnotation(GlobalValue *GV, StringRef K,
+ const Metadata *V) {
if (K == "kernel") {
assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
- F->addFnAttr("nvvm.kernel");
+ cast<Function>(GV)->addFnAttr("nvvm.kernel");
return true;
}
if (K == "align") {
@@ -61,8 +63,8 @@ bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) {
// TODO: Skip adding the stackalign attribute for returns, for now.
if (!Idx)
return false;
- F->addAttributeAtIndex(
- Idx, Attribute::getWithStackAlignment(F->getContext(), StackAlign));
+ cast<Function>(GV)->addAttributeAtIndex(
+ Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
return true;
}
@@ -82,7 +84,7 @@ void static upgradeNVAnnotations(Module &M) {
continue;
SeenNodes.insert(MD);
- Function *F = mdconst::dyn_extract_or_null<Function>(MD->getOperand(0));
+ auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
if (!F)
continue;
>From eb96b4dcee7db603e43707a00a188937171e955b Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 9 Dec 2024 23:45:54 +0000
Subject: [PATCH 3/5] move to auto-upgrade
---
llvm/include/llvm/IR/AutoUpgrade.h | 4 +
llvm/lib/AsmParser/LLParser.cpp | 1 +
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +
llvm/lib/IR/AutoUpgrade.cpp | 63 +++++++++
llvm/lib/Linker/IRMover.cpp | 1 +
llvm/lib/Target/NVPTX/CMakeLists.txt | 1 -
llvm/lib/Target/NVPTX/NVPTX.h | 5 -
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 -
.../Target/NVPTX/NVVMUpgradeAnnotations.cpp | 132 ------------------
9 files changed, 71 insertions(+), 142 deletions(-)
delete mode 100644 llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h
index 97c3e4d7589d7b..8c093568a1e031 100644
--- a/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/llvm/include/llvm/IR/AutoUpgrade.h
@@ -61,6 +61,10 @@ namespace llvm {
/// module is modified.
bool UpgradeModuleFlags(Module &M);
+ /// Convert legacy nvvm.annotations metadata to appropriate function
+ /// attributes.
+ void UpgradeNVVMAnnotations(Module &M);
+
/// Convert calls to ARC runtime functions to intrinsic calls and upgrade the
/// old retain release marker to new module flag format.
void UpgradeARCRuntime(Module &M);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 34311499367b41..4e869cfe312e47 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -448,6 +448,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
llvm::UpgradeDebugInfo(*M);
UpgradeModuleFlags(*M);
+ UpgradeNVVMAnnotations(*M);
UpgradeSectionAttributes(*M);
if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 85c6fadeda6cc3..fff196aca9ffc6 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7142,6 +7142,8 @@ Error BitcodeReader::materializeModule() {
UpgradeModuleFlags(*TheModule);
+ UpgradeNVVMAnnotations(*TheModule);
+
UpgradeARCRuntime(*TheModule);
return Error::success();
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e73538da282e99..d774606bac9448 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5022,6 +5022,69 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
+ const Metadata *V) {
+ if (K == "kernel") {
+ assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
+ cast<Function>(GV)->addFnAttr("nvvm.kernel");
+ return true;
+ }
+ if (K == "align") {
+ const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ const unsigned Idx = (AlignBits >> 16);
+ const Align StackAlign = Align(AlignBits & 0xFFFF);
+ // TODO: Skip adding the stackalign attribute for returns, for now.
+ if (!Idx)
+ return false;
+ cast<Function>(GV)->addAttributeAtIndex(
+ Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
+ return true;
+ }
+
+ return false;
+}
+
+void llvm::UpgradeNVVMAnnotations(Module &M) {
+ NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
+ if (!NamedMD)
+ return;
+
+ SmallVector<MDNode *, 8> NewNodes;
+ SmallSet<const MDNode *, 8> SeenNodes;
+ for (MDNode *MD : NamedMD->operands()) {
+ if (SeenNodes.contains(MD))
+ continue;
+ SeenNodes.insert(MD);
+
+ auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
+ if (!F)
+ continue;
+
+ assert(MD && "Invalid MDNode for annotation");
+ assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
+
+ SmallVector<Metadata *, 8> NewOperands;
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
+ MDString *K = cast<MDString>(MD->getOperand(j));
+ const MDOperand &V = MD->getOperand(j + 1);
+ bool Upgraded = upgradeSingleNVVMAnnotation(F, K->getString(), V);
+ if (!Upgraded)
+ NewOperands.append({K, V});
+ }
+
+ if (!NewOperands.empty()) {
+ NewOperands.insert(NewOperands.begin(), MD->getOperand(0));
+ NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
+ }
+ }
+
+ NamedMD->clearOperands();
+ for (MDNode *N : NewNodes)
+ NamedMD->addOperand(N);
+}
+
/// This checks for objc retain release marker which should be upgraded. It
/// returns true if module is modified.
static bool upgradeRetainReleaseMarker(Module &M) {
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index a0c3f2c5b0baf6..5681c4257a90ad 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1247,6 +1247,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
// Check for module flag for updates before do anything.
UpgradeModuleFlags(*SrcM);
+ UpgradeNVVMAnnotations(*SrcM);
// If the destination module doesn't have module flags yet, then just copy
// over the source module's flags.
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index bb2e4ad48b51d8..693365161330f5 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -39,7 +39,6 @@ set(NVPTXCodeGen_sources
NVVMReflect.cpp
NVPTXProxyRegErasure.cpp
NVPTXCtorDtorLowering.cpp
- NVVMUpgradeAnnotations.cpp
)
add_llvm_target(NVPTXCodeGen
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 53418148be3615..ca915cd3f3732f 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -52,7 +52,6 @@ FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
bool NoTrapAfterNoreturn);
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
-ModulePass *createNVVMUpgradeAnnotationsPass();
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -75,10 +74,6 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-struct NVVMUpgradeAnnotationsPass : PassInfoMixin<NVVMUpgradeAnnotationsPass> {
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-};
-
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index b4fd36625adc9c..a5c5e9420ee737 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -254,8 +254,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
- PM.addPass(NVVMUpgradeAnnotationsPass());
-
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
@@ -351,8 +349,6 @@ void NVPTXPassConfig::addIRPasses() {
AAR.addAAResult(WrapperPass->getResult());
}));
- addPass(createNVVMUpgradeAnnotationsPass());
-
// NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
deleted file mode 100644
index 27415be5034f96..00000000000000
--- a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-//===- NVVMUpgradeAnnotations.cpp - Upgrade NVVM Annotations --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass replaces deprecated metadata in nvvm.annotation with a more modern
-// IR representation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-#include <cstdint>
-
-#define DEBUG_TYPE "nvvm-upgrade-annotations"
-
-using namespace llvm;
-
-namespace llvm {
-void initializeNVVMUpgradeAnnotationsLegacyPassPass(PassRegistry &);
-} // namespace llvm
-
-namespace {
-
-class NVVMUpgradeAnnotationsLegacyPass : public ModulePass {
-public:
- static char ID;
- NVVMUpgradeAnnotationsLegacyPass() : ModulePass(ID) {
- initializeNVVMUpgradeAnnotationsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
-};
-} // namespace
-
-char NVVMUpgradeAnnotationsLegacyPass::ID = 0;
-
-bool static autoUpgradeAnnotation(GlobalValue *GV, StringRef K,
- const Metadata *V) {
- if (K == "kernel") {
- assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
- cast<Function>(GV)->addFnAttr("nvvm.kernel");
- return true;
- }
- if (K == "align") {
- const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue();
- const unsigned Idx = (AlignBits >> 16);
- const Align StackAlign = Align(AlignBits & 0xFFFF);
- // TODO: Skip adding the stackalign attribute for returns, for now.
- if (!Idx)
- return false;
- cast<Function>(GV)->addAttributeAtIndex(
- Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
- return true;
- }
-
- return false;
-}
-
-// Iterate over nvvm.annotations rewriting them as appropiate.
-void static upgradeNVAnnotations(Module &M) {
- NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
- if (!NamedMD)
- return;
-
- SmallVector<MDNode *, 8> NewNodes;
- SmallSet<const MDNode *, 8> SeenNodes;
- for (MDNode *MD : NamedMD->operands()) {
- if (SeenNodes.contains(MD))
- continue;
- SeenNodes.insert(MD);
-
- auto *F = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
- if (!F)
- continue;
-
- assert(MD && "Invalid MDNode for annotation");
- assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
-
- SmallVector<Metadata *, 8> NewOperands;
- // start index = 1, to skip the global variable key
- // increment = 2, to skip the value for each property-value pairs
- for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
- MDString *K = cast<MDString>(MD->getOperand(j));
- const MDOperand &V = MD->getOperand(j + 1);
- bool Upgraded = autoUpgradeAnnotation(F, K->getString(), V);
- if (!Upgraded)
- NewOperands.append({K, V});
- }
-
- if (!NewOperands.empty()) {
- NewOperands.insert(NewOperands.begin(), MD->getOperand(0));
- NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
- }
- }
-
- NamedMD->clearOperands();
- for (MDNode *N : NewNodes)
- NamedMD->addOperand(N);
-}
-
-PreservedAnalyses NVVMUpgradeAnnotationsPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- upgradeNVAnnotations(M);
- return PreservedAnalyses::all();
-}
-
-bool NVVMUpgradeAnnotationsLegacyPass::runOnModule(Module &M) {
- upgradeNVAnnotations(M);
- return false;
-}
-
-INITIALIZE_PASS(NVVMUpgradeAnnotationsLegacyPass, DEBUG_TYPE,
- "NVVMUpgradeAnnotations", false, false)
-
-ModulePass *llvm::createNVVMUpgradeAnnotationsPass() {
- return new NVVMUpgradeAnnotationsLegacyPass();
-}
>From 7ff7a58f960b7f5f94e20885d620120e3339ab2a Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 10 Dec 2024 17:44:10 +0000
Subject: [PATCH 4/5] update test for auto-upgrade
---
clang/test/CodeGenCUDA/ptx-kernels.cu | 2 +-
clang/test/CodeGenOpenCL/ptx-kernels.cl | 2 +-
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 22 +-
llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 36 +-
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 36 +-
.../Transforms/OpenMP/always_inline_device.ll | 15 +-
.../test/Transforms/OpenMP/barrier_removal.ll | 86 +-
llvm/test/Transforms/OpenMP/bug66687.ll | 3 +-
.../OpenMP/custom_state_machines.ll | 1020 ++++++-----
.../OpenMP/custom_state_machines_pre_lto.ll | 1530 ++++++++---------
.../get_hardware_num_threads_in_block_fold.ll | 5 +-
...dware_num_threads_in_block_fold_optnone.ll | 5 +-
.../OpenMP/is_spmd_exec_mode_fold.ll | 6 +-
.../Transforms/OpenMP/nested_parallelism.ll | 8 +-
.../Transforms/OpenMP/parallel_level_fold.ll | 5 +-
.../Transforms/OpenMP/remove_globalization.ll | 26 +-
.../OpenMP/replace_globalization.ll | 13 +-
llvm/test/Transforms/OpenMP/spmdization.ll | 516 +++---
.../Transforms/OpenMP/spmdization_assumes.ll | 25 +-
.../Transforms/OpenMP/spmdization_guarding.ll | 106 +-
...mdization_guarding_two_reaching_kernels.ll | 32 +-
.../Transforms/OpenMP/spmdization_indirect.ll | 150 +-
...zation_no_guarding_two_reaching_kernels.ll | 32 +-
.../OpenMP/value-simplify-openmp-opt.ll | 46 +-
24 files changed, 1744 insertions(+), 1983 deletions(-)
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index dd64bd822c01e0..0c838662f82bee 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -10,7 +10,7 @@
extern "C"
__device__ void device_function() {}
-// CHECK-LABEL: define{{.*}} void @global_function{{.*}} #[[ATTR0:[0-9]+]]
+// CHECK: define{{.*}} void @global_function{{.*}} #[[ATTR0:[0-9]+]]
extern "C"
__global__ void global_function() {
// CHECK: call void @device_function
diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl
index 93f2df6c49e82a..8b8fc0bab10c88 100644
--- a/clang/test/CodeGenOpenCL/ptx-kernels.cl
+++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl
@@ -6,6 +6,6 @@ void device_function() {
__kernel void kernel_function() {
}
-// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]]
+// CHECK: define{{.*}} spir_kernel void @kernel_function() {{.*}} #[[ATTR0:[0-9]+]]
// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 04e83576cbf958..4364074dc01349 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -317,9 +317,6 @@ bool isKernelFunction(const Function &F) {
if (F.hasFnAttribute("nvvm.kernel"))
return true;
- if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
- return (*X == 1);
-
return false;
}
@@ -329,14 +326,17 @@ MaybeAlign getAlign(const Function &F, unsigned Index) {
F.getAttributes().getAttributes(Index).getStackAlignment())
return StackAlign;
- // If that is missing, check the legacy nvvm metadata
- std::vector<unsigned> Vs;
- bool retval = findAllNVVMAnnotation(&F, "align", Vs);
- if (!retval)
- return std::nullopt;
- for (unsigned V : Vs)
- if ((V >> 16) == Index)
- return Align(V & 0xFFFF);
+ // check the legacy nvvm metadata only for the return value since llvm does
+ // not support stackalign attribute for this.
+ if (Index == 0) {
+ std::vector<unsigned> Vs;
+ bool retval = findAllNVVMAnnotation(&F, "align", Vs);
+ if (!retval)
+ return std::nullopt;
+ for (unsigned V : Vs)
+ if ((V >> 16) == Index)
+ return Align(V & 0xFFFF);
+ }
return std::nullopt;
}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b40ab357670b86..c85d872f0c7cd5 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5911,31 +5911,21 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) {
KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
- NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
KernelSet Kernels;
- if (!MD)
- return Kernels;
-
- for (auto *Op : MD->operands()) {
- if (Op->getNumOperands() < 2)
- continue;
- MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
- if (!KindID || KindID->getString() != "kernel")
- continue;
-
- Function *KernelFn =
- mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
- if (!KernelFn)
- continue;
-
- // We are only interested in OpenMP target regions. Others, such as kernels
- // generated by CUDA but linked together, are not interesting to this pass.
- if (isOpenMPKernel(*KernelFn)) {
- ++NumOpenMPTargetRegionKernels;
- Kernels.insert(KernelFn);
- } else
- ++NumNonOpenMPTargetRegionKernels;
+ for (auto &F : M) {
+ // TODO: unify this check with isKernelFunction in NVPTXUtilities.
+ if (F.hasFnAttribute("nvvm.kernel")) {
+
+ // We are only interested in OpenMP target regions. Others, such as
+ // kernels generated by CUDA but linked together, are not interesting to
+ // this pass.
+ if (isOpenMPKernel(F)) {
+ ++NumOpenMPTargetRegionKernels;
+ Kernels.insert(&F);
+ } else
+ ++NumNonOpenMPTargetRegionKernels;
+ }
}
return Kernels;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 9cfe9192772b89..6fdb8a63a611e3 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -83,7 +83,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou
; PTX-NEXT: st.global.u32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_int(
-; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
@@ -91,6 +91,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
; OPT-NEXT: ret void
+;
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
store i32 %add, ptr %out
@@ -114,7 +115,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; PTX-NEXT: st.global.u32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_struct(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
@@ -125,6 +126,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
; OPT-NEXT: ret void
+;
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%int1 = load i32, ptr %gep1
@@ -160,11 +162,12 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
+;
%call = call i32 @escape(ptr %input)
ret void
}
@@ -213,7 +216,7 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define void @multiple_grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
@@ -222,6 +225,7 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
; OPT-NEXT: ret void
+;
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
@@ -242,13 +246,14 @@ define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %
; PTX-NEXT: st.global.u64 [%rd3], %rd5;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_memory_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
; OPT-NEXT: ret void
+;
store ptr %input, ptr %addr, align 8
ret void
}
@@ -272,7 +277,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt
; PTX-NEXT: ret;
; PTX-NOT .local
; OPT-LABEL: define void @grid_const_inlineasm_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
@@ -282,6 +287,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
; OPT-NEXT: ret void
+;
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
@@ -320,7 +326,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_partial_escape(
-; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
@@ -330,6 +336,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
+;
%val = load i32, ptr %input
%twice = add i32 %val, %val
store i32 %twice, ptr %output
@@ -370,7 +377,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define i32 @grid_const_partial_escapemem(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
@@ -383,6 +390,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
+;
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%val1 = load i32, ptr %ptr1
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -416,7 +424,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; PTX-NEXT: st.global.u32 [%rd1], %r2;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_phi(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
@@ -435,6 +443,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -479,7 +488,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(
; PTX-NEXT: st.global.u32 [%rd1], %r2;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_phi_ngc(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
@@ -500,6 +509,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
@@ -540,7 +550,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; PTX-NEXT: st.global.u32 [%rd3], %r2;
; PTX-NEXT: ret;
; OPT-LABEL: define void @grid_const_select(
-; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
@@ -553,6 +563,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
%ptrnew = select i1 %less, ptr %input1, ptr %input2
@@ -577,13 +588,14 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define i32 @grid_const_ptrtoint(
-; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR1]] {
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT3:%.*]] = load i32, ptr addrspace(101) [[INPUT2]], align 4
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]]
; OPT-NEXT: ret i32 [[KEEPALIVE]]
+;
%val = load i32, ptr %input
%ptrval = ptrtoint ptr %input to i32
%keepalive = add i32 %val, %ptrval
diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll
index 6028ff5278037b..a60402df417520 100644
--- a/llvm/test/Transforms/OpenMP/always_inline_device.ll
+++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll
@@ -92,16 +92,15 @@ attributes #2 = { convergent }
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!"clang version 14.0.0"}
;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 47a5d5104aa8bd..8518e9d137b547 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -99,7 +99,7 @@ define void @pos_empty_7a() "kernel" {
define void @pos_empty_7b() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b
; CHECK-SAME: () #[[ATTR4]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: ret void
@@ -247,7 +247,7 @@ m:
}
define void @neg_empty_1() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_empty_1
-; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: ret void
;
@@ -256,7 +256,7 @@ define void @neg_empty_1() "kernel" {
}
define void @neg_empty_2() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@neg_empty_2
-; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: ret void
;
call void @aligned_barrier()
@@ -512,7 +512,7 @@ m:
define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1
-; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR5]] {
; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
; CHECK: t0:
; CHECK-NEXT: br label [[T0B:%.*]]
@@ -553,7 +553,7 @@ m:
define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2
-; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR5]] {
; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
; CHECK: t0:
; CHECK-NEXT: br label [[T0B:%.*]]
@@ -592,7 +592,7 @@ m:
define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3
-; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR5]] {
; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
; CHECK: t0:
; CHECK-NEXT: br label [[T0B:%.*]]
@@ -629,7 +629,7 @@ m:
define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1
-; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR5]] {
; CHECK-NEXT: store i32 0, ptr [[P]], align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
@@ -952,7 +952,7 @@ define internal void @barrier_then_write_then_barrier2(ptr %p) {
}
define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
-; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
+; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR5]] {
; MODULE-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
; MODULE-NEXT: store i32 0, ptr [[P]], align 4
; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
@@ -978,7 +978,7 @@ define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, ptr
; MODULE-NEXT: ret void
;
; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
-; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
+; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR5]] {
; CGSCC-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
; CGSCC-NEXT: call void @aligned_barrier()
; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
@@ -1270,77 +1270,21 @@ exit:
; MODULE: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
; MODULE: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; MODULE: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
-; MODULE: attributes #[[ATTR4]] = { "kernel" }
-; MODULE: attributes #[[ATTR5]] = { nosync memory(none) }
+; MODULE: attributes #[[ATTR4]] = { "kernel" "nvvm.kernel" }
+; MODULE: attributes #[[ATTR5]] = { "kernel" }
+; MODULE: attributes #[[ATTR6]] = { nosync memory(none) }
;.
; CGSCC: attributes #[[ATTR0]] = { "llvm.assume"="ompx_aligned_barrier" }
; CGSCC: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
; CGSCC: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
-; CGSCC: attributes #[[ATTR4]] = { "kernel" }
-; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) }
+; CGSCC: attributes #[[ATTR4]] = { "kernel" "nvvm.kernel" }
+; CGSCC: attributes #[[ATTR5]] = { "kernel" }
+; CGSCC: attributes #[[ATTR6]] = { nosync memory(none) }
;.
; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; MODULE: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
-; MODULE: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
-; MODULE: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
-; MODULE: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
-; MODULE: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
-; MODULE: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
-; MODULE: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
-; MODULE: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
-; MODULE: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
-; MODULE: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
-; MODULE: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
-; MODULE: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
-; MODULE: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
-; MODULE: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
-; MODULE: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
-; MODULE: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
-; MODULE: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
-; MODULE: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
-; MODULE: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
-; MODULE: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
-; MODULE: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
-; MODULE: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
-; MODULE: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
-; MODULE: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
-; MODULE: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
-; MODULE: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
-; MODULE: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
-; MODULE: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
-; MODULE: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
;.
; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CGSCC: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
-; CGSCC: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
-; CGSCC: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
-; CGSCC: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
-; CGSCC: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
-; CGSCC: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
-; CGSCC: [[META8:![0-9]+]] = !{ptr @neg_empty_8, !"kernel", i32 1}
-; CGSCC: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
-; CGSCC: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
-; CGSCC: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
-; CGSCC: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
-; CGSCC: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
-; CGSCC: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
-; CGSCC: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
-; CGSCC: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
-; CGSCC: [[META17:![0-9]+]] = !{ptr @pos_empty_7a, !"kernel", i32 1}
-; CGSCC: [[META18:![0-9]+]] = !{ptr @pos_empty_7b, !"kernel", i32 1}
-; CGSCC: [[META19:![0-9]+]] = !{ptr @neg_empty_9, !"kernel", i32 1}
-; CGSCC: [[META20:![0-9]+]] = !{ptr @pos_empty_10, !"kernel", i32 1}
-; CGSCC: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1}
-; CGSCC: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1}
-; CGSCC: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
-; CGSCC: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
-; CGSCC: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
-; CGSCC: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
-; CGSCC: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
-; CGSCC: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
-; CGSCC: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
-; CGSCC: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/bug66687.ll b/llvm/test/Transforms/OpenMP/bug66687.ll
index e0a9b825a88041..3872239d8f1ea5 100644
--- a/llvm/test/Transforms/OpenMP/bug66687.ll
+++ b/llvm/test/Transforms/OpenMP/bug66687.ll
@@ -14,7 +14,8 @@ define weak void @openmp_kernel() "kernel" {
}
define weak_odr void @non_openmp_kernel() {
-; CHECK-LABEL: define weak_odr void @non_openmp_kernel() {
+; CHECK-LABEL: define weak_odr void @non_openmp_kernel(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: ret void
;
ret void
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index e6ddf16f067634..47efdf7cea203c 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -919,8 +919,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -934,35 +934,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU-SAME: () #[[ATTR2:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; AMDGPU: omp_if.then:
; AMDGPU-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-NEXT: br label [[OMP_IF_END]]
; AMDGPU: omp_if.end:
-; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-SAME: () #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -1027,8 +1027,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1037,61 +1037,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
@@ -1151,8 +1151,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1161,31 +1161,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU-SAME: () #[[ATTR7:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-SAME: () #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1195,40 +1195,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU-SAME: () #[[ATTR6]] {
+; AMDGPU-SAME: () #[[ATTR7]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-SAME: () #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1288,8 +1288,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1298,60 +1298,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
@@ -1405,8 +1405,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1415,60 +1415,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
@@ -1522,8 +1522,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1532,12 +1532,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-NEXT: ret void
@@ -1545,47 +1545,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
@@ -1599,8 +1599,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1609,18 +1609,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -1632,8 +1632,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: if.end:
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; AMDGPU-NEXT: br label [[RETURN]]
; AMDGPU: return:
; AMDGPU-NEXT: ret void
@@ -1641,7 +1641,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -1653,8 +1653,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: if.end:
; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; AMDGPU-NEXT: br label [[RETURN]]
; AMDGPU: return:
; AMDGPU-NEXT: ret void
@@ -1700,8 +1700,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1710,80 +1710,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-SAME: () #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU-SAME: () #[[ATTR6]] {
+; AMDGPU-SAME: () #[[ATTR7]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline nounwind
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-SAME: () #[[ATTR2]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1793,24 +1793,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-NEXT: ret void
;
;
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
;
@@ -1824,8 +1824,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -1839,35 +1839,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX-SAME: () #[[ATTR2:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; NVPTX: omp_if.then:
; NVPTX-NEXT: store i32 0, ptr @G, align 4
-; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-NEXT: br label [[OMP_IF_END]]
; NVPTX: omp_if.end:
-; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-SAME: () #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -1931,8 +1931,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -1941,61 +1941,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
@@ -2054,8 +2054,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2064,31 +2064,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX-SAME: () #[[ATTR7:[0-9]+]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-SAME: () #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2098,40 +2098,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX-SAME: () #[[ATTR6]] {
+; NVPTX-SAME: () #[[ATTR7]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-SAME: () #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2190,8 +2190,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2200,60 +2200,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
@@ -2306,8 +2306,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2316,60 +2316,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
@@ -2422,8 +2422,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2432,12 +2432,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-NEXT: ret void
@@ -2445,47 +2445,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
@@ -2499,8 +2499,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2509,18 +2509,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2532,8 +2532,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: if.end:
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; NVPTX-NEXT: br label [[RETURN]]
; NVPTX: return:
; NVPTX-NEXT: ret void
@@ -2541,7 +2541,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2553,8 +2553,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: if.end:
; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; NVPTX-NEXT: br label [[RETURN]]
; NVPTX: return:
; NVPTX-NEXT: ret void
@@ -2599,8 +2599,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2609,80 +2609,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-SAME: () #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX-SAME: () #[[ATTR6]] {
+; NVPTX-SAME: () #[[ATTR7]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline nounwind
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-SAME: () #[[ATTR2]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2692,24 +2692,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-NEXT: ret void
;
;
; NVPTX: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
;
@@ -2723,8 +2723,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2738,35 +2738,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; AMDGPU-DISABLED: omp_if.then:
; AMDGPU-DISABLED-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]]
; AMDGPU-DISABLED: omp_if.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -2791,8 +2791,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2801,61 +2801,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2869,8 +2869,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2879,31 +2879,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR7:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2913,40 +2913,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR7]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2964,8 +2964,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2974,60 +2974,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3041,8 +3041,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3051,60 +3051,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3118,8 +3118,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3128,12 +3128,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
@@ -3141,47 +3141,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3195,8 +3195,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3205,18 +3205,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3228,8 +3228,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED: if.end:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: br label [[RETURN]]
; AMDGPU-DISABLED: return:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3237,7 +3237,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3249,8 +3249,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED: if.end:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: br label [[RETURN]]
; AMDGPU-DISABLED: return:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3266,8 +3266,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3276,80 +3276,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR7]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
+; AMDGPU-DISABLED-SAME: () #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3359,24 +3359,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3390,8 +3390,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3405,35 +3405,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; NVPTX-DISABLED: omp_if.then:
; NVPTX-DISABLED-NEXT: store i32 0, ptr @G, align 4
-; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]]
; NVPTX-DISABLED: omp_if.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -3458,8 +3458,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3468,61 +3468,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3536,8 +3536,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3546,31 +3546,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR7:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3580,40 +3580,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR7]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3631,8 +3631,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3641,60 +3641,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3708,8 +3708,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3718,60 +3718,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3785,8 +3785,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3795,12 +3795,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
@@ -3808,47 +3808,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3862,8 +3862,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3872,18 +3872,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3895,8 +3895,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED: if.end:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: br label [[RETURN]]
; NVPTX-DISABLED: return:
; NVPTX-DISABLED-NEXT: ret void
@@ -3904,7 +3904,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3916,8 +3916,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED: if.end:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: br label [[RETURN]]
; NVPTX-DISABLED: return:
; NVPTX-DISABLED-NEXT: ret void
@@ -3933,8 +3933,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3943,80 +3943,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR7]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
+; NVPTX-DISABLED-SAME: () #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -4026,78 +4026,82 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]]
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
;
;.
-; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU: attributes #[[ATTR11]] = { convergent }
+; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; AMDGPU: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU: attributes #[[ATTR10]] = { convergent nounwind }
+; AMDGPU: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; AMDGPU: attributes #[[ATTR12]] = { convergent }
;.
-; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR3]] = { nounwind }
-; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX: attributes #[[ATTR11]] = { convergent }
+; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR4]] = { nounwind }
+; NVPTX: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; NVPTX: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX: attributes #[[ATTR10]] = { convergent nounwind }
+; NVPTX: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; NVPTX: attributes #[[ATTR12]] = { convergent }
;.
-; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent }
+; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; AMDGPU-DISABLED: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; AMDGPU-DISABLED: attributes #[[ATTR12]] = { convergent }
;.
-; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent }
+; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; NVPTX-DISABLED: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; NVPTX-DISABLED: attributes #[[ATTR12]] = { convergent }
;.
; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4107,17 +4111,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4127,17 +4123,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4147,17 +4135,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4167,15 +4147,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index d20821d450365b..478b267a90f89c 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -928,8 +928,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -938,35 +938,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU1-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU1-SAME: () #[[ATTR2:[0-9]+]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; AMDGPU1: omp_if.then:
; AMDGPU1-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU1-NEXT: br label [[OMP_IF_END]]
; AMDGPU1: omp_if.end:
-; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU1-SAME: () #[[ATTR1]] {
+; AMDGPU1-SAME: () #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -991,8 +991,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1001,61 +1001,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1069,8 +1069,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1079,31 +1079,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU1-SAME: () #[[ATTR7:[0-9]+]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU1-SAME: () #[[ATTR1]] {
+; AMDGPU1-SAME: () #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1113,40 +1113,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU1-SAME: () #[[ATTR6]] {
+; AMDGPU1-SAME: () #[[ATTR7]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU1-SAME: () #[[ATTR1]] {
+; AMDGPU1-SAME: () #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1164,8 +1164,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1174,60 +1174,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1241,8 +1241,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1251,60 +1251,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1318,8 +1318,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1328,12 +1328,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU1-NEXT: ret void
@@ -1341,47 +1341,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1395,8 +1395,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1405,18 +1405,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -1428,8 +1428,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1: if.end:
; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; AMDGPU1-NEXT: br label [[RETURN]]
; AMDGPU1: return:
; AMDGPU1-NEXT: ret void
@@ -1437,7 +1437,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -1449,8 +1449,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1: if.end:
; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; AMDGPU1-NEXT: br label [[RETURN]]
; AMDGPU1: return:
; AMDGPU1-NEXT: ret void
@@ -1466,8 +1466,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1476,80 +1476,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU1-SAME: () #[[ATTR1]] {
+; AMDGPU1-SAME: () #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU1-SAME: () #[[ATTR6]] {
+; AMDGPU1-SAME: () #[[ATTR7]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU1-SAME: () #[[ATTR1]] {
+; AMDGPU1-SAME: () #[[ATTR2]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1559,24 +1559,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU1-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU1-NEXT: ret void
;
;
; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1590,8 +1590,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1600,35 +1600,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX1-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX1-SAME: () #[[ATTR2:[0-9]+]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; NVPTX1: omp_if.then:
; NVPTX1-NEXT: store i32 0, ptr @G, align 4
-; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX1-NEXT: br label [[OMP_IF_END]]
; NVPTX1: omp_if.end:
-; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX1-SAME: () #[[ATTR1]] {
+; NVPTX1-SAME: () #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -1653,8 +1653,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1663,61 +1663,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p1() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
@@ -1731,8 +1731,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1741,31 +1741,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX1-SAME: () #[[ATTR7:[0-9]+]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX1-SAME: () #[[ATTR1]] {
+; NVPTX1-SAME: () #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1775,40 +1775,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p1() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX1-SAME: () #[[ATTR6]] {
+; NVPTX1-SAME: () #[[ATTR7]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX1-SAME: () #[[ATTR1]] {
+; NVPTX1-SAME: () #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -1826,8 +1826,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1836,60 +1836,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p1() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
@@ -1903,8 +1903,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1913,60 +1913,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p1() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
@@ -1980,8 +1980,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1990,12 +1990,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX1-NEXT: ret void
@@ -2003,47 +2003,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p1() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
@@ -2057,8 +2057,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -2067,18 +2067,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2090,8 +2090,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1: if.end:
; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; NVPTX1-NEXT: br label [[RETURN]]
; NVPTX1: return:
; NVPTX1-NEXT: ret void
@@ -2099,7 +2099,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2111,8 +2111,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1: if.end:
; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; NVPTX1-NEXT: br label [[RETURN]]
; NVPTX1: return:
; NVPTX1-NEXT: ret void
@@ -2128,8 +2128,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -2138,80 +2138,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX1-SAME: () #[[ATTR1]] {
+; NVPTX1-SAME: () #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX1-SAME: () #[[ATTR6]] {
+; NVPTX1-SAME: () #[[ATTR7]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline nounwind
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX1-SAME: () #[[ATTR1]] {
+; NVPTX1-SAME: () #[[ATTR2]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2221,24 +2221,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX1-NEXT: call void @p0() #[[ATTR12]]
; NVPTX1-NEXT: ret void
;
;
; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX1-NEXT: ret void
;
;
@@ -2252,8 +2252,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2262,35 +2262,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU2-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU2-SAME: () #[[ATTR2:[0-9]+]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; AMDGPU2: omp_if.then:
; AMDGPU2-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU2-NEXT: br label [[OMP_IF_END]]
; AMDGPU2: omp_if.end:
-; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU2-SAME: () #[[ATTR1]] {
+; AMDGPU2-SAME: () #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -2315,8 +2315,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2325,61 +2325,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2393,8 +2393,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2403,31 +2403,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU2-SAME: () #[[ATTR7:[0-9]+]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU2-SAME: () #[[ATTR1]] {
+; AMDGPU2-SAME: () #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2437,40 +2437,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU2-SAME: () #[[ATTR6]] {
+; AMDGPU2-SAME: () #[[ATTR7]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU2-SAME: () #[[ATTR1]] {
+; AMDGPU2-SAME: () #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2488,8 +2488,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2498,60 +2498,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2565,8 +2565,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2575,60 +2575,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2642,8 +2642,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2652,12 +2652,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU2-NEXT: ret void
@@ -2665,47 +2665,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2719,8 +2719,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2729,18 +2729,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2752,8 +2752,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2: if.end:
; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; AMDGPU2-NEXT: br label [[RETURN]]
; AMDGPU2: return:
; AMDGPU2-NEXT: ret void
@@ -2761,7 +2761,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2773,8 +2773,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2: if.end:
; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; AMDGPU2-NEXT: br label [[RETURN]]
; AMDGPU2: return:
; AMDGPU2-NEXT: ret void
@@ -2790,8 +2790,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2800,80 +2800,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU2-SAME: () #[[ATTR1]] {
+; AMDGPU2-SAME: () #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU2-SAME: () #[[ATTR6]] {
+; AMDGPU2-SAME: () #[[ATTR7]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU2-SAME: () #[[ATTR1]] {
+; AMDGPU2-SAME: () #[[ATTR2]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -2883,24 +2883,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU2-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU2-NEXT: ret void
;
;
; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2914,8 +2914,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -2924,35 +2924,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU3-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU3-SAME: () #[[ATTR2:[0-9]+]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; AMDGPU3: omp_if.then:
; AMDGPU3-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU3-NEXT: br label [[OMP_IF_END]]
; AMDGPU3: omp_if.end:
-; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU3-SAME: () #[[ATTR1]] {
+; AMDGPU3-SAME: () #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -2977,8 +2977,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -2987,61 +2987,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3055,8 +3055,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3065,31 +3065,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU3-SAME: () #[[ATTR7:[0-9]+]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU3-SAME: () #[[ATTR1]] {
+; AMDGPU3-SAME: () #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3099,40 +3099,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU3-SAME: () #[[ATTR6]] {
+; AMDGPU3-SAME: () #[[ATTR7]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU3-SAME: () #[[ATTR1]] {
+; AMDGPU3-SAME: () #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3150,8 +3150,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3160,60 +3160,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3227,8 +3227,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3237,60 +3237,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3304,8 +3304,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3314,12 +3314,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; AMDGPU3-NEXT: ret void
@@ -3327,47 +3327,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p1() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3381,8 +3381,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3391,18 +3391,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3414,8 +3414,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3: if.end:
; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; AMDGPU3-NEXT: br label [[RETURN]]
; AMDGPU3: return:
; AMDGPU3-NEXT: ret void
@@ -3423,7 +3423,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -3435,8 +3435,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3: if.end:
; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; AMDGPU3-NEXT: br label [[RETURN]]
; AMDGPU3: return:
; AMDGPU3-NEXT: ret void
@@ -3452,8 +3452,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3462,80 +3462,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU3-SAME: () #[[ATTR1]] {
+; AMDGPU3-SAME: () #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU3-SAME: () #[[ATTR6]] {
+; AMDGPU3-SAME: () #[[ATTR7]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU3-SAME: () #[[ATTR1]] {
+; AMDGPU3-SAME: () #[[ATTR2]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3545,24 +3545,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU3-NEXT: call void @p0() #[[ATTR12]]
; AMDGPU3-NEXT: ret void
;
;
; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3576,8 +3576,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3586,35 +3586,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX2-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX2-SAME: () #[[ATTR2:[0-9]+]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; NVPTX2: omp_if.then:
; NVPTX2-NEXT: store i32 0, ptr @G, align 4
-; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX2-NEXT: br label [[OMP_IF_END]]
; NVPTX2: omp_if.end:
-; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX2-SAME: () #[[ATTR1]] {
+; NVPTX2-SAME: () #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -3639,8 +3639,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3649,61 +3649,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p1() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
@@ -3717,8 +3717,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3727,31 +3727,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX2-SAME: () #[[ATTR7:[0-9]+]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX2-SAME: () #[[ATTR1]] {
+; NVPTX2-SAME: () #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3761,40 +3761,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p1() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX2-SAME: () #[[ATTR6]] {
+; NVPTX2-SAME: () #[[ATTR7]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX2-SAME: () #[[ATTR1]] {
+; NVPTX2-SAME: () #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -3812,8 +3812,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3822,60 +3822,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p1() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
@@ -3889,8 +3889,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3899,60 +3899,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p1() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
@@ -3966,8 +3966,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3976,12 +3976,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX2-NEXT: ret void
@@ -3989,47 +3989,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p1() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
@@ -4043,8 +4043,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -4053,18 +4053,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -4076,8 +4076,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2: if.end:
; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; NVPTX2-NEXT: br label [[RETURN]]
; NVPTX2: return:
; NVPTX2-NEXT: ret void
@@ -4085,7 +4085,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -4097,8 +4097,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2: if.end:
; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; NVPTX2-NEXT: br label [[RETURN]]
; NVPTX2: return:
; NVPTX2-NEXT: ret void
@@ -4114,8 +4114,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -4124,80 +4124,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX2-SAME: () #[[ATTR1]] {
+; NVPTX2-SAME: () #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX2-SAME: () #[[ATTR6]] {
+; NVPTX2-SAME: () #[[ATTR7]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline nounwind
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX2-SAME: () #[[ATTR1]] {
+; NVPTX2-SAME: () #[[ATTR2]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -4207,24 +4207,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX2-NEXT: call void @p0() #[[ATTR12]]
; NVPTX2-NEXT: ret void
;
;
; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX2-NEXT: ret void
;
;
@@ -4238,8 +4238,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4248,35 +4248,35 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]]
+; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX3-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX3-SAME: () #[[ATTR2:[0-9]+]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
; NVPTX3: omp_if.then:
; NVPTX3-NEXT: store i32 0, ptr @G, align 4
-; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX3-NEXT: br label [[OMP_IF_END]]
; NVPTX3: omp_if.end:
-; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX3-SAME: () #[[ATTR1]] {
+; NVPTX3-SAME: () #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
@@ -4301,8 +4301,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4311,61 +4311,61 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12:[0-9]+]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p1() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
@@ -4379,8 +4379,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4389,31 +4389,31 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]]
+; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX3-SAME: () #[[ATTR7:[0-9]+]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX3-SAME: () #[[ATTR1]] {
+; NVPTX3-SAME: () #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -4423,40 +4423,40 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p1() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX3-SAME: () #[[ATTR6]] {
+; NVPTX3-SAME: () #[[ATTR7]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX3-SAME: () #[[ATTR1]] {
+; NVPTX3-SAME: () #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -4474,8 +4474,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4484,60 +4484,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p1() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
@@ -4551,8 +4551,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4561,60 +4561,60 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p1() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
@@ -4628,8 +4628,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4638,12 +4638,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
; NVPTX3-NEXT: ret void
@@ -4651,47 +4651,47 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p1() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
@@ -4705,8 +4705,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4715,18 +4715,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR7]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -4738,8 +4738,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3: if.end:
; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]]
; NVPTX3-NEXT: br label [[RETURN]]
; NVPTX3: return:
; NVPTX3-NEXT: ret void
@@ -4747,7 +4747,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -4759,8 +4759,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3: if.end:
; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]]
+; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]]
; NVPTX3-NEXT: br label [[RETURN]]
; NVPTX3: return:
; NVPTX3-NEXT: ret void
@@ -4776,8 +4776,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4786,80 +4786,80 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR10]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX3-SAME: () #[[ATTR1]] {
+; NVPTX3-SAME: () #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX3-SAME: () #[[ATTR6]] {
+; NVPTX3-SAME: () #[[ATTR7]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline nounwind
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX3-SAME: () #[[ATTR1]] {
+; NVPTX3-SAME: () #[[ATTR2]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
@@ -4869,104 +4869,110 @@ attributes #9 = { convergent nounwind readonly willreturn }
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX3-NEXT: call void @p0() #[[ATTR12]]
; NVPTX3-NEXT: ret void
;
;
; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX3-NEXT: ret void
;
;.
-; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU1: attributes #[[ATTR11]] = { convergent }
+; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; AMDGPU1: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU1: attributes #[[ATTR10]] = { convergent nounwind }
+; AMDGPU1: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; AMDGPU1: attributes #[[ATTR12]] = { convergent }
;.
-; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR3]] = { nounwind }
-; NVPTX1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX1: attributes #[[ATTR11]] = { convergent }
+; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR4]] = { nounwind }
+; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; NVPTX1: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX1: attributes #[[ATTR10]] = { convergent nounwind }
+; NVPTX1: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; NVPTX1: attributes #[[ATTR12]] = { convergent }
;.
-; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU2: attributes #[[ATTR11]] = { convergent }
+; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; AMDGPU2: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU2: attributes #[[ATTR10]] = { convergent nounwind }
+; AMDGPU2: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; AMDGPU2: attributes #[[ATTR12]] = { convergent }
;.
-; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU3: attributes #[[ATTR11]] = { convergent }
+; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; AMDGPU3: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; AMDGPU3: attributes #[[ATTR10]] = { convergent nounwind }
+; AMDGPU3: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; AMDGPU3: attributes #[[ATTR12]] = { convergent }
;.
-; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR3]] = { nounwind }
-; NVPTX2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX2: attributes #[[ATTR11]] = { convergent }
+; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR4]] = { nounwind }
+; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; NVPTX2: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX2: attributes #[[ATTR10]] = { convergent nounwind }
+; NVPTX2: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; NVPTX2: attributes #[[ATTR12]] = { convergent }
;.
-; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR3]] = { nounwind }
-; NVPTX3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX3: attributes #[[ATTR11]] = { convergent }
+; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR2]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR4]] = { nounwind }
+; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR6:[0-9]+]] = { alwaysinline }
+; NVPTX3: attributes #[[ATTR7]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+; NVPTX3: attributes #[[ATTR10]] = { convergent nounwind }
+; NVPTX3: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" }
+; NVPTX3: attributes #[[ATTR12]] = { convergent }
;.
; AMDGPU1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4976,17 +4982,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -4996,17 +4994,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX1: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX1: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX1: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5016,17 +5006,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5036,17 +5018,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5056,17 +5030,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX2: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX2: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX2: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -5076,15 +5042,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX3: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX3: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX3: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index 6102201ad4bac7..4840dc1a14707e 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -210,14 +210,11 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!3 = !{ptr @kernel1, !"kernel", i32 1}
!4 = !{ptr @kernel2, !"kernel", i32 1}
;.
-; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
+; CHECK: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
index 0cf6e7488b4dd4..e17ed5f9fa6857 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
@@ -118,12 +118,9 @@ attributes #0 = { optnone noinline "kernel" "omp_target_thread_limit"="666" "omp
!4 = !{ptr @kernel2, !"kernel", i32 1}
;
;.
-; CHECK: attributes #[[ATTR0]] = { noinline optnone "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
+; CHECK: attributes #[[ATTR0]] = { noinline optnone "kernel" "nvvm.kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
index 310ac0a8296c33..3b248dfdc2d592 100644
--- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
+++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
@@ -216,15 +216,11 @@ declare void @bar()
!4 = !{ptr @non_spmd, !"kernel", i32 1}
!5 = !{ptr @will_not_be_spmd, !"kernel", i32 1}
;.
-; CHECK: attributes #[[ATTR0]] = { "kernel" }
+; CHECK: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; CHECK: attributes #[[ATTR3]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @is_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @will_be_spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @non_spmd, !"kernel", i32 1}
-; CHECK: [[META5:![0-9]+]] = !{ptr @will_not_be_spmd, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 5c4386b24a3d5a..97fc5dc0bf33ac 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -326,7 +326,7 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11
!4 = !{i32 7, !"openmp", i32 50}
!5 = !{i32 7, !"openmp-device", i32 50}
;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { "kernel" }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "kernel" "nvvm.kernel" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind allocsize(0) }
; CHECK: attributes #[[ATTR2]] = { nounwind }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
@@ -336,8 +336,6 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l13, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_10302_bd7e0_main_l16, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
;.
diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
index fd6e7683af8e3e..959cb20c172206 100644
--- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
@@ -144,12 +144,9 @@ declare void @__kmpc_target_deinit() #1
!3 = !{ptr @spmd, !"kernel", i32 1}
!4 = !{ptr @parallel, !"kernel", i32 1}
;.
-; CHECK: attributes #[[ATTR0]] = { "kernel" }
+; CHECK: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" }
; CHECK: attributes #[[ATTR1]] = { alwaysinline }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1}
;.
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 31e3ef2b9079f6..47795b20844787 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -98,14 +98,14 @@ define internal void @bar() {
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
-; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
-; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -146,7 +146,7 @@ define void @unused() {
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG11:![0-9]+]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG10:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
;
@@ -254,14 +254,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!15 = !DILocation(line: 8, column: 2, scope: !9)
!16 = !DILocation(line: 10, column: 2, scope: !9)
;.
-; CHECK: attributes #[[ATTR0]] = { "kernel" }
+; CHECK: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" }
; CHECK: attributes #[[ATTR1]] = { nosync nounwind }
; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) }
; CHECK: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" }
; CHECK: attributes #[[ATTR5]] = { nosync nounwind memory(write) }
;.
-; CHECK-DISABLED: attributes #[[ATTR0]] = { "kernel" }
+; CHECK-DISABLED: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" }
; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind }
; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) }
; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) }
@@ -276,10 +276,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]])
-; CHECK: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK: [[META10]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]])
+; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
@@ -288,11 +287,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: [[META9:![0-9]+]])
-; CHECK-DISABLED: [[META9]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META10:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK-DISABLED: [[META10]] = !DISubroutineType(types: [[META2]])
-; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: [[META9]])
+; CHECK-DISABLED: [[DBG7]] = !DILocation(line: 4, column: 2, scope: [[META8:![0-9]+]])
+; CHECK-DISABLED: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK-DISABLED: [[META9]] = !DISubroutineType(types: [[META2]])
+; CHECK-DISABLED: [[DBG10]] = !DILocation(line: 6, column: 2, scope: [[META8]])
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-REMARKS: {{.*}}
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 6e4fb9e57388ba..73a7e92d7e62c7 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -177,7 +177,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
; CHECK: master3:
-; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG7:![0-9]+]]
; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR7]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]]
; CHECK-NEXT: br label [[EXIT]]
@@ -214,7 +214,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: ret i32 0
;
;.
-; CHECK: attributes #[[ATTR0]] = { "kernel" }
+; CHECK: attributes #[[ATTR0]] = { "kernel" "nvvm.kernel" }
; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) }
; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
@@ -231,12 +231,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1}
-; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1}
-; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1}
-; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: [[META11:![0-9]+]])
-; CHECK: [[META11]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META12:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
-; CHECK: [[META12]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG7]] = !DILocation(line: 5, column: 14, scope: [[META8:![0-9]+]])
+; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-LIMIT: {{.*}}
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 6ff4b96b57556c..ffc6aaa6b3dab2 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -282,7 +282,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -299,7 +299,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -350,7 +350,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -401,7 +401,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -451,7 +451,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -501,7 +501,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -643,10 +643,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -661,10 +661,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -679,10 +679,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -697,10 +697,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -715,10 +715,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -733,10 +733,10 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -950,7 +950,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -967,7 +967,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1018,7 +1018,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1069,7 +1069,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1119,7 +1119,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1169,7 +1169,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1314,10 +1314,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1334,10 +1334,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1355,10 +1355,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1376,10 +1376,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1396,10 +1396,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1416,10 +1416,10 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -1641,7 +1641,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -1658,7 +1658,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1709,7 +1709,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1760,7 +1760,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1810,7 +1810,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1860,7 +1860,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2002,11 +2002,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2021,11 +2021,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2040,11 +2040,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2059,11 +2059,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2078,11 +2078,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2097,11 +2097,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2167,54 +2167,54 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
@@ -2252,7 +2252,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
@@ -2264,7 +2264,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
@@ -2276,7 +2276,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
@@ -2288,7 +2288,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
@@ -2300,7 +2300,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
@@ -2312,7 +2312,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
@@ -2365,7 +2365,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -2382,7 +2382,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -2433,7 +2433,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2484,7 +2484,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2534,7 +2534,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2584,7 +2584,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2723,7 +2723,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; AMDGPU: region.guarded:
-; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]]
; AMDGPU: region.guarded.end:
; AMDGPU-NEXT: br label [[REGION_BARRIER]]
@@ -2740,11 +2740,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2756,7 +2756,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; NVPTX: region.guarded:
-; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]]
; NVPTX: region.guarded.end:
; NVPTX-NEXT: br label [[REGION_BARRIER]]
@@ -2773,17 +2773,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2793,17 +2793,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2813,17 +2813,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2833,17 +2833,17 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -2853,11 +2853,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
@@ -2926,54 +2926,54 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED2-NEXT: ret void
;
@@ -3011,7 +3011,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
@@ -3023,7 +3023,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
@@ -3035,7 +3035,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
@@ -3047,7 +3047,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
@@ -3059,7 +3059,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
@@ -3071,7 +3071,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
@@ -4404,7 +4404,7 @@ attributes #11 = { convergent }
; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
;.
-; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4417,7 +4417,7 @@ attributes #11 = { convergent }
; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; NVPTX: attributes #[[ATTR1]] = { norecurse }
; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4430,7 +4430,7 @@ attributes #11 = { convergent }
; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4443,7 +4443,7 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4456,7 +4456,7 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4469,7 +4469,7 @@ attributes #11 = { convergent }
; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
@@ -4488,30 +4488,24 @@ attributes #11 = { convergent }
; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4519,30 +4513,24 @@ attributes #11 = { convergent }
; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4550,30 +4538,24 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4581,30 +4563,24 @@ attributes #11 = { convergent }
; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; AMDGPU-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; AMDGPU-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; AMDGPU-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; AMDGPU-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; AMDGPU-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4612,30 +4588,24 @@ attributes #11 = { convergent }
; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED1: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX-DISABLED1: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX-DISABLED1: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX-DISABLED1: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX-DISABLED1: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED1: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED1: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX-DISABLED1: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX-DISABLED1: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX-DISABLED1: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX-DISABLED1: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4643,28 +4613,22 @@ attributes #11 = { convergent }
; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX-DISABLED2: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0}
-; NVPTX-DISABLED2: [[META19]] = !{!"int", [[META20:![0-9]+]], i64 0}
-; NVPTX-DISABLED2: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0}
-; NVPTX-DISABLED2: [[META21]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]}
-; NVPTX-DISABLED2: [[META23]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED2: [[META24]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED2: [[LOOP25]] = distinct !{[[LOOP25]], [[META23]], [[META24]]}
-; NVPTX-DISABLED2: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
-; NVPTX-DISABLED2: [[META27]] = !{!"any pointer", [[META20]], i64 0}
-; NVPTX-DISABLED2: [[LOOP28]] = distinct !{[[LOOP28]], [[META23]], [[META24]]}
-; NVPTX-DISABLED2: [[LOOP29]] = distinct !{[[LOOP29]], [[META23]], [[META24]]}
+; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index 2f43a4e4286a23..c09b571221d7aa 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -47,7 +47,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr %dyn, ptr nonnull a
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
-; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA7:![0-9]+]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -144,7 +144,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
!10 = !{!"omnipotent char", !11, i64 0}
!11 = !{!"Simple C/C++ TBAA"}
;.
-; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" }
; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
; CHECK: attributes #[[ATTR2]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
; CHECK: attributes #[[ATTR3]] = { nounwind }
@@ -154,15 +154,14 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
-; CHECK: [[META9]] = !{!"double", [[META10:![0-9]+]], i64 0}
-; CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0}
-; CHECK: [[META11]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"double", [[META9:![0-9]+]], i64 0}
+; CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
+; CHECK: [[META10]] = !{!"Simple C/C++ TBAA"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 229a49d7845593..606c9bd29c0d02 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -84,9 +84,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
-; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]]
-; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]]
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -111,7 +111,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
; CHECK: region.guarded4:
-; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
; CHECK: region.guarded.end1:
; CHECK-NEXT: br label [[REGION_BARRIER2]]
@@ -120,10 +120,10 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: br label [[REGION_EXIT3]]
; CHECK: region.exit3:
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
-; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: __omp_outlined__.exit:
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0)
-; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
@@ -132,7 +132,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
; CHECK: region.guarded9:
-; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
; CHECK: region.guarded.end6:
; CHECK-NEXT: br label [[REGION_BARRIER7]]
@@ -140,7 +140,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
; CHECK: region.exit8:
-; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
@@ -149,7 +149,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
; CHECK: region.guarded14:
-; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
; CHECK: region.guarded.end11:
; CHECK-NEXT: br label [[REGION_BARRIER12]]
@@ -157,7 +157,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
; CHECK: region.exit13:
-; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
@@ -166,7 +166,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
; CHECK: region.guarded19:
-; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]]
+; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
; CHECK: region.guarded.end16:
; CHECK-NEXT: br label [[REGION_BARRIER17]]
@@ -174,9 +174,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
; CHECK: region.exit18:
-; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
@@ -230,13 +230,13 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-DISABLED-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
; CHECK-DISABLED-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META8:![0-9]+]]
+; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
-; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK-DISABLED: for.cond.i:
@@ -248,26 +248,26 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
-; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
-; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-DISABLED: __omp_outlined__.exit:
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
-; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
-; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
-; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META8]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
@@ -422,7 +422,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
;.
-; CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
@@ -434,7 +434,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
; CHECK: attributes #[[ATTR9]] = { nounwind willreturn "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
; CHECK: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
;.
-; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { alwaysinline }
; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
@@ -447,30 +447,28 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK: [[META8]] = !{[[META9:![0-9]+]]}
-; CHECK: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"}
-; CHECK: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK: [[META12]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"}
+; CHECK: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]}
+; CHECK: [[META11]] = !{!"llvm.loop.mustprogress"}
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK-DISABLED: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
-; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLED: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CHECK-DISABLED: [[META8]] = !{[[META9:![0-9]+]]}
-; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], [[META10:![0-9]+]], !"__omp_outlined__: %__context"}
-; CHECK-DISABLED: [[META10]] = distinct !{[[META10]], !"__omp_outlined__"}
-; CHECK-DISABLED: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK-DISABLED: [[META12]] = !{!"llvm.loop.mustprogress"}
+; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLED: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLED: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK-DISABLED: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"__omp_outlined__: %__context"}
+; CHECK-DISABLED: [[META9]] = distinct !{[[META9]], !"__omp_outlined__"}
+; CHECK-DISABLED: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]]}
+; CHECK-DISABLED: [[META11]] = !{!"llvm.loop.mustprogress"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
index 11405b7eb447c2..6a2ed99bad217e 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
@@ -336,7 +336,7 @@ attributes #5 = { convergent }
!8 = !{i32 7, !"frame-pointer", i32 2}
!9 = !{!"clang version 14.0.0"}
;.
-; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR2]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR3]] = { nounwind }
@@ -346,7 +346,7 @@ attributes #5 = { convergent }
; CHECK: attributes #[[ATTR7]] = { convergent nosync nounwind memory(write) }
; CHECK: attributes #[[ATTR8]] = { convergent }
;.
-; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3]] = { nounwind }
@@ -358,23 +358,19 @@ attributes #5 = { convergent }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index f348825446c63d..40749caea89b31 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -57,7 +57,7 @@ define internal void @spmd_callees__debug(i1 %c) {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -88,7 +88,7 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17:![0-9]+]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -143,10 +143,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -161,10 +161,10 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
@@ -262,10 +262,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -282,10 +282,10 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
@@ -413,7 +413,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -473,7 +473,7 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@@ -530,11 +530,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -551,11 +551,11 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA25:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
@@ -587,18 +587,18 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @unknown() #[[ATTR7]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @unknown() #[[ATTR7]]
; NVPTX-NEXT: ret void
;
@@ -622,7 +622,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
@@ -634,7 +634,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA25]]
+; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
@@ -668,7 +668,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -686,7 +686,7 @@ define weak void @spmd_callees_metadata(ptr %fp) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -757,7 +757,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AMDGPU: 3:
@@ -816,7 +816,7 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; NVPTX: 3:
@@ -868,10 +868,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -885,10 +885,10 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA17]]
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
br label %for.cond
@@ -1107,7 +1107,7 @@ attributes #11 = { convergent }
!31 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable}
!32 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable_external}
;.
-; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; AMDGPU: attributes #[[ATTR3]] = { norecurse nounwind }
@@ -1120,7 +1120,7 @@ attributes #11 = { convergent }
; AMDGPU: attributes #[[ATTR10]] = { nounwind }
; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" "nvvm.kernel" }
; NVPTX: attributes #[[ATTR1]] = { norecurse }
; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
; NVPTX: attributes #[[ATTR3]] = { norecurse nounwind }
@@ -1139,29 +1139,24 @@ attributes #11 = { convergent }
; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1}
-; AMDGPU: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; AMDGPU: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
-; AMDGPU: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0}
-; AMDGPU: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
-; AMDGPU: [[META20]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]}
-; AMDGPU: [[META22]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU: [[META23]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]}
-; AMDGPU: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
-; AMDGPU: [[META26]] = !{!"any pointer", [[META19]], i64 0}
-; AMDGPU: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]}
-; AMDGPU: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]}
+; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -1169,27 +1164,22 @@ attributes #11 = { convergent }
; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX: [[META6:![0-9]+]] = !{ptr @spmd_callees, !"kernel", i32 1}
-; NVPTX: [[META7:![0-9]+]] = !{ptr @spmd_and_non_spmd_callees_metadata, !"kernel", i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @spmd_and_non_spmd_callee, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @spmd_callees_metadata, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META12:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX: [[META14:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX: [[META16:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; NVPTX: [[TBAA17]] = !{[[META18:![0-9]+]], [[META18]], i64 0}
-; NVPTX: [[META18]] = !{!"int", [[META19:![0-9]+]], i64 0}
-; NVPTX: [[META19]] = !{!"omnipotent char", [[META20:![0-9]+]], i64 0}
-; NVPTX: [[META20]] = !{!"Simple C/C++ TBAA"}
-; NVPTX: [[LOOP21]] = distinct !{[[LOOP21]], [[META22:![0-9]+]], [[META23:![0-9]+]]}
-; NVPTX: [[META22]] = !{!"llvm.loop.mustprogress"}
-; NVPTX: [[META23]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX: [[LOOP24]] = distinct !{[[LOOP24]], [[META22]], [[META23]]}
-; NVPTX: [[TBAA25]] = !{[[META26:![0-9]+]], [[META26]], i64 0}
-; NVPTX: [[META26]] = !{!"any pointer", [[META19]], i64 0}
-; NVPTX: [[LOOP27]] = distinct !{[[LOOP27]], [[META22]], [[META23]]}
-; NVPTX: [[LOOP28]] = distinct !{[[LOOP28]], [[META22]], [[META23]]}
+; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
+; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
+; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
+; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
+; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
+; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
+; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
+; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
+; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
+; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
index f28f61e053275c..96d2bf60388b01 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
@@ -412,7 +412,7 @@ attributes #5 = { convergent }
!8 = !{i32 7, !"frame-pointer", i32 2}
!9 = !{!"clang version 14.0.0"}
;.
-; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR2]] = { norecurse nosync memory(write) }
; CHECK: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
@@ -422,7 +422,7 @@ attributes #5 = { convergent }
; CHECK: attributes #[[ATTR7]] = { convergent nounwind }
; CHECK: attributes #[[ATTR8]] = { nosync nounwind memory(write) }
;.
-; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "nvvm.kernel" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { norecurse nosync memory(write) }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
@@ -434,23 +434,19 @@ attributes #5 = { convergent }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META8:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK-DISABLE-SPMDIZATION: [[META9:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
;.
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index 5e2abbae1811c6..8026ac41d3b44d 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -853,8 +853,8 @@ declare void @llvm.assume(i1)
!20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
-; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
-; TUNIT: attributes #[[ATTR1]] = { "kernel" }
+; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" "nvvm.kernel" }
+; TUNIT: attributes #[[ATTR1]] = { "kernel" "nvvm.kernel" }
; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback }
@@ -862,8 +862,8 @@ declare void @llvm.assume(i1)
; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" }
; TUNIT: attributes #[[ATTR7]] = { nounwind }
;.
-; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
-; CGSCC: attributes #[[ATTR1]] = { "kernel" }
+; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" "nvvm.kernel" }
+; CGSCC: attributes #[[ATTR1]] = { "kernel" "nvvm.kernel" }
; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback }
@@ -872,45 +872,7 @@ declare void @llvm.assume(i1)
;.
; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; TUNIT: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; TUNIT: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
-; TUNIT: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
-; TUNIT: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
-; TUNIT: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
-; TUNIT: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
-; TUNIT: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
-; TUNIT: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
-; TUNIT: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
-; TUNIT: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
-; TUNIT: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
-; TUNIT: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
-; TUNIT: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
-; TUNIT: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
-; TUNIT: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
-; TUNIT: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
-; TUNIT: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
-; TUNIT: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
-; TUNIT: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CGSCC: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
-; CGSCC: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
-; CGSCC: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
-; CGSCC: [[META5:![0-9]+]] = !{ptr @kernel4a1, !"kernel", i32 1}
-; CGSCC: [[META6:![0-9]+]] = !{ptr @kernel4b1, !"kernel", i32 1}
-; CGSCC: [[META7:![0-9]+]] = !{ptr @kernel4a2, !"kernel", i32 1}
-; CGSCC: [[META8:![0-9]+]] = !{ptr @kernel4b2, !"kernel", i32 1}
-; CGSCC: [[META9:![0-9]+]] = !{ptr @kernel4a3, !"kernel", i32 1}
-; CGSCC: [[META10:![0-9]+]] = !{ptr @kernel4b3, !"kernel", i32 1}
-; CGSCC: [[META11:![0-9]+]] = !{ptr @kernel4c1, !"kernel", i32 1}
-; CGSCC: [[META12:![0-9]+]] = !{ptr @kernel4d1, !"kernel", i32 1}
-; CGSCC: [[META13:![0-9]+]] = !{ptr @kernel4c2, !"kernel", i32 1}
-; CGSCC: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
-; CGSCC: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
-; CGSCC: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
-; CGSCC: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
-; CGSCC: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
-; CGSCC: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
-; CGSCC: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
;.
>From 416d908b81fd25eb5a11f726cd581e693036d460 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 10 Dec 2024 20:28:42 +0000
Subject: [PATCH 5/5] address comments
---
clang/test/CodeGenCUDA/ptx-kernels.cu | 3 ++-
clang/test/CodeGenCUDA/usual-deallocators.cu | 3 ++-
clang/test/CodeGenOpenCL/ptx-calls.cl | 3 ++-
clang/test/CodeGenOpenCL/ptx-kernels.cl | 3 ++-
llvm/lib/IR/AutoUpgrade.cpp | 23 ++++++++++++--------
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 10 ++++-----
llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 1 -
7 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index 0c838662f82bee..e5c841f1e209b6 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -10,7 +10,8 @@
extern "C"
__device__ void device_function() {}
-// CHECK: define{{.*}} void @global_function{{.*}} #[[ATTR0:[0-9]+]]
+// CHECK-LABEL: define{{.*}} void @global_function
+// CHECK-SAME: #[[ATTR0:[0-9]+]]
extern "C"
__global__ void global_function() {
// CHECK: call void @device_function
diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
index 21616b19ae135a..b1a6dcb1d74865 100644
--- a/clang/test/CodeGenCUDA/usual-deallocators.cu
+++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
@@ -109,7 +109,8 @@ __host__ __device__ void tests_hd(void *t) {
}
// Make sure that we've generated the kernel used by A::~A.
-// DEVICE: define void @_Z1fIiEvT_{{.*}} #[[ATTR0:[0-9]+]]
+// DEVICE-LABEL: define void @_Z1fIiEvT_
+// DEVICE-SAME: #[[ATTR0:[0-9]+]]
// Make sure we've picked deallocator for the correct side of compilation.
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index c914db87572cee..71c5bd598ab5ca 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -7,7 +7,8 @@ void device_function() {
__kernel void kernel_function() {
device_function();
}
-// CHECK: define{{.*}} spir_kernel void @kernel_function() #[[ATTR0:[0-9]+]]
+// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
+// CHECK-SAME: #[[ATTR0:[0-9]+]]
// CHECK: call void @device_function()
// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl
index 8b8fc0bab10c88..f4bd3f0d762be1 100644
--- a/clang/test/CodeGenOpenCL/ptx-kernels.cl
+++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl
@@ -6,6 +6,7 @@ void device_function() {
__kernel void kernel_function() {
}
-// CHECK: define{{.*}} spir_kernel void @kernel_function() {{.*}} #[[ATTR0:[0-9]+]]
+// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
+// CHECK-SAME: #[[ATTR0:[0-9]+]]
// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.kernel" {{.*}}}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d774606bac9448..2487668715fe4f 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5025,14 +5025,19 @@ bool llvm::UpgradeDebugInfo(Module &M) {
bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
const Metadata *V) {
if (K == "kernel") {
- assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
- cast<Function>(GV)->addFnAttr("nvvm.kernel");
+ if (!mdconst::extract<ConstantInt>(V)->isZero())
+ cast<Function>(GV)->addFnAttr("nvvm.kernel");
return true;
}
if (K == "align") {
- const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue();
- const unsigned Idx = (AlignBits >> 16);
- const Align StackAlign = Align(AlignBits & 0xFFFF);
+ // V is a bitfeild specifying two 16-bit values. The alignment value is
+ // specfied in low 16-bits, The index is specified in the high bits. For the
+ // index, 0 indicates the return value while higher values correspond to
+ // each parameter (idx = param + 1).
+ const uint64_t AlignIdxValuePair =
+ mdconst::extract<ConstantInt>(V)->getZExtValue();
+ const unsigned Idx = (AlignIdxValuePair >> 16);
+ const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
// TODO: Skip adding the stackalign attribute for returns, for now.
if (!Idx)
return false;
@@ -5063,7 +5068,9 @@ void llvm::UpgradeNVVMAnnotations(Module &M) {
assert(MD && "Invalid MDNode for annotation");
assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
- SmallVector<Metadata *, 8> NewOperands;
+ SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
+ // Each nvvm.annotations metadata entry will be of the following form:
+ // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
// start index = 1, to skip the global variable key
// increment = 2, to skip the value for each property-value pairs
for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
@@ -5074,10 +5081,8 @@ void llvm::UpgradeNVVMAnnotations(Module &M) {
NewOperands.append({K, V});
}
- if (!NewOperands.empty()) {
- NewOperands.insert(NewOperands.begin(), MD->getOperand(0));
+ if (NewOperands.size() > 1)
NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
- }
}
NamedMD->clearOperands();
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 4364074dc01349..ecab143662e685 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -330,12 +330,10 @@ MaybeAlign getAlign(const Function &F, unsigned Index) {
// not support stackalign attribute for this.
if (Index == 0) {
std::vector<unsigned> Vs;
- bool retval = findAllNVVMAnnotation(&F, "align", Vs);
- if (!retval)
- return std::nullopt;
- for (unsigned V : Vs)
- if ((V >> 16) == Index)
- return Align(V & 0xFFFF);
+ if (findAllNVVMAnnotation(&F, "align", Vs))
+ for (unsigned V : Vs)
+ if ((V >> 16) == Index)
+ return Align(V & 0xFFFF);
}
return std::nullopt;
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index c85d872f0c7cd5..1ef1adcc84c17a 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5916,7 +5916,6 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
for (auto &F : M) {
// TODO: unify this check with isKernelFunction in NVPTXUtilities.
if (F.hasFnAttribute("nvvm.kernel")) {
-
// We are only interested in OpenMP target regions. Others, such as
// kernels generated by CUDA but linked together, are not interesting to
// this pass.
More information about the llvm-commits
mailing list