[Mlir-commits] [clang] [llvm] [mlir] [NVPTX] Switch front-ends and tests to ptx_kernel cc (PR #120806)
Alex MacLean
llvmlistbot at llvm.org
Fri Dec 20 15:45:24 PST 2024
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/120806
>From 2c07700dd37b9e25cc04abe7dd50e1dc57ca8411 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 20 Dec 2024 23:09:28 +0000
Subject: [PATCH] [NVPTX] Switch front-ends and tests to ptx_kernel cc
---
clang/lib/CodeGen/Targets/NVPTX.cpp | 39 +++--
clang/test/CodeGen/nvptx_attributes.c | 8 +-
clang/test/CodeGenCUDA/device-fun-linkage.cu | 8 +-
clang/test/CodeGenCUDA/grid-constant.cu | 8 +-
clang/test/CodeGenCUDA/offload_via_llvm.cu | 4 +-
clang/test/CodeGenCUDA/ptx-kernels.cu | 7 +-
clang/test/CodeGenCUDA/usual-deallocators.cu | 4 +-
clang/test/CodeGenOpenCL/ptx-calls.cl | 4 +-
clang/test/CodeGenOpenCL/ptx-kernels.cl | 4 +-
clang/test/CodeGenOpenCL/reflect.cl | 10 +-
clang/test/Headers/gpuintrin.c | 2 +-
.../Target/NVPTX/NVPTXCtorDtorLowering.cpp | 18 +--
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 6 +-
.../UniformityAnalysis/NVPTX/daorder.ll | 5 +-
.../UniformityAnalysis/NVPTX/diverge.ll | 16 +-
.../NVPTX/hidden_diverge.ll | 5 +-
.../UniformityAnalysis/NVPTX/irreducible.ll | 4 +-
llvm/test/CodeGen/NVPTX/b52037.ll | 5 +-
llvm/test/CodeGen/NVPTX/bug21465.ll | 6 +-
llvm/test/CodeGen/NVPTX/bug22322.ll | 5 +-
llvm/test/CodeGen/NVPTX/bug26185.ll | 13 +-
.../CodeGen/NVPTX/call-with-alloca-buffer.ll | 6 +-
llvm/test/CodeGen/NVPTX/cluster-dim.ll | 7 +-
llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll | 6 +-
llvm/test/CodeGen/NVPTX/i1-array-global.ll | 6 +-
llvm/test/CodeGen/NVPTX/i1-ext-load.ll | 6 +-
llvm/test/CodeGen/NVPTX/i1-global.ll | 6 +-
llvm/test/CodeGen/NVPTX/i1-param.ll | 6 +-
llvm/test/CodeGen/NVPTX/intr-range.ll | 18 +--
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 8 +-
.../NVPTX/load-with-non-coherent-cache.ll | 59 +++----
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 4 +-
llvm/test/CodeGen/NVPTX/lower-alloca.ll | 4 +-
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 84 +++++-----
llvm/test/CodeGen/NVPTX/lower-args.ll | 13 +-
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 150 ++++++++----------
llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll | 6 +-
.../CodeGen/NVPTX/lower-kernel-ptr-arg.ll | 10 +-
llvm/test/CodeGen/NVPTX/maxclusterrank.ll | 5 +-
.../CodeGen/NVPTX/noduplicate-syncthreads.ll | 5 +-
llvm/test/CodeGen/NVPTX/noreturn.ll | 9 +-
llvm/test/CodeGen/NVPTX/nvcl-param-align.ll | 5 +-
llvm/test/CodeGen/NVPTX/refl1.ll | 6 +-
llvm/test/CodeGen/NVPTX/reg-copy.ll | 6 +-
llvm/test/CodeGen/NVPTX/simple-call.ll | 8 +-
llvm/test/CodeGen/NVPTX/surf-read-cuda.ll | 14 +-
llvm/test/CodeGen/NVPTX/surf-read.ll | 7 +-
llvm/test/CodeGen/NVPTX/surf-tex.py | 36 ++---
llvm/test/CodeGen/NVPTX/surf-write-cuda.ll | 10 +-
llvm/test/CodeGen/NVPTX/surf-write.ll | 7 +-
llvm/test/CodeGen/NVPTX/tex-read-cuda.ll | 13 +-
llvm/test/CodeGen/NVPTX/tex-read.ll | 5 +-
llvm/test/CodeGen/NVPTX/unreachable.ll | 5 +-
llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 4 +-
llvm/test/DebugInfo/NVPTX/debug-info.ll | 8 +-
.../LoopStrengthReduce/NVPTX/trunc.ll | 4 +-
.../NVPTX/speculative-slsr.ll | 6 +-
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 10 +-
mlir/test/Target/LLVMIR/nvvmir.mlir | 29 ++--
59 files changed, 305 insertions(+), 477 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 0431d2cc4ddc39..b82e4ddb9f3f2b 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand,
- const SmallVectorImpl<int> &GridConstantArgs);
+ int Operand);
- static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand) {
- addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
- }
+ static void
+ addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
+ const SmallVectorImpl<int> &GridConstantArgs);
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
@@ -277,7 +276,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1, GCI);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+ addGridConstantNVVMMetadata(F, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -285,13 +285,12 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<NVPTXKernelAttr>()) {
- addNVVMMetadata(F, "kernel", 1);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(
- llvm::GlobalValue *GV, StringRef Name, int Operand,
- const SmallVectorImpl<int> &GridConstantArgs) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+ StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
@@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+
+ // Append metadata to nvvm.annotations
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
+ llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
+
+ llvm::Module *M = GV->getParent();
+ llvm::LLVMContext &Ctx = M->getContext();
+
+ // Get "nvvm.annotations" metadata node
+ llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+ SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
@@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}
+
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
diff --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c
index 7dbd9f1321e280..8b9f3a2c18a1df 100644
--- a/clang/test/CodeGen/nvptx_attributes.c
+++ b/clang/test/CodeGen/nvptx_attributes.c
@@ -10,8 +10,14 @@
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4
// CHECK-NEXT: ret void
+//
__attribute__((nvptx_kernel)) void foo(int *ret) {
*ret = 1;
}
-// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenCUDA/device-fun-linkage.cu b/clang/test/CodeGenCUDA/device-fun-linkage.cu
index 54899e0e9c0f16..bdac62d1d03e84 100644
--- a/clang/test/CodeGenCUDA/device-fun-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-fun-linkage.cu
@@ -17,8 +17,8 @@ template __device__ void func<int>();
// RDC: define weak_odr void @_Z4funcIiEvv()
template __global__ void kernel<int>();
-// NORDC: define void @_Z6kernelIiEvv()
-// RDC: define weak_odr void @_Z6kernelIiEvv()
+// NORDC: define ptx_kernel void @_Z6kernelIiEvv()
+// RDC: define weak_odr ptx_kernel void @_Z6kernelIiEvv()
// Ensure that unused static device function is eliminated
static __device__ void static_func() {}
@@ -28,5 +28,5 @@ static __device__ void static_func() {}
// Ensure that kernel function has external or weak_odr
// linkage regardless static specifier
static __global__ void static_kernel() {}
-// NORDC: define void @_ZL13static_kernelv()
-// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()
+// NORDC: define ptx_kernel void @_ZL13static_kernelv()
+// RDC: define weak_odr ptx_kernel void @_ZL13static_kernelv[[FILEID:.*]]()
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
index 8d4be9c9dc7e1e..e7000cab3cda59 100644
--- a/clang/test/CodeGenCUDA/grid-constant.cu
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -21,11 +21,11 @@ void foo() {
}
//.
//.
-// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
+// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
-// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
+// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
-// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
-// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
+// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
+// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
//.
diff --git a/clang/test/CodeGenCUDA/offload_via_llvm.cu b/clang/test/CodeGenCUDA/offload_via_llvm.cu
index 434eba99c1795d..62942d8dc07551 100644
--- a/clang/test/CodeGenCUDA/offload_via_llvm.cu
+++ b/clang/test/CodeGenCUDA/offload_via_llvm.cu
@@ -7,7 +7,7 @@
#define __OFFLOAD_VIA_LLVM__ 1
#include "Inputs/cuda.h"
-// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_(
+// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_(
// HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
// HST-NEXT: [[ENTRY:.*:]]
// HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
@@ -50,7 +50,7 @@
// HST: [[SETUP_END]]:
// HST-NEXT: ret void
//
-// DEV-LABEL: define dso_local void @_Z3fooisPvS_(
+// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_(
// DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index b7172b77369296..a7d5e11bd496fb 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -10,7 +10,7 @@
extern "C"
__device__ void device_function() {}
-// CHECK-LABEL: define{{.*}} void @global_function
+// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
extern "C"
__global__ void global_function() {
// CHECK: call void @device_function
@@ -19,7 +19,7 @@ __global__ void global_function() {
// Make sure host-instantiated kernels are preserved on device side.
template <typename T> __global__ void templated_kernel(T param) {}
-// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_(
+// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
namespace {
__global__ void anonymous_ns_kernel() {}
@@ -30,6 +30,3 @@ void host_function() {
templated_kernel<<<0, 0>>>(0);
anonymous_ns_kernel<<<0,0>>>();
}
-
-// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
-// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
index b85a706813fc2b..64560efb74135e 100644
--- a/clang/test/CodeGenCUDA/usual-deallocators.cu
+++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
@@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
}
// Make sure that we've generated the kernel used by A::~A.
-// DEVICE-LABEL: define void @_Z1fIiEvT_
+// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_
// Make sure we've picked deallocator for the correct side of compilation.
@@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) {
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0)
// DEVICE: call void @dev_fn()
// HOST: call void @host_fn()
-
-// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index 0081152ae40e01..ae187173b1730c 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -7,7 +7,5 @@ void device_function() {
__kernel void kernel_function() {
device_function();
}
-// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
+// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
// CHECK: call void @device_function()
-// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
-
diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl
index 210e5682ac721c..eac0df4abfbeaa 100644
--- a/clang/test/CodeGenOpenCL/ptx-kernels.cl
+++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl
@@ -6,6 +6,4 @@ void device_function() {
__kernel void kernel_function() {
}
-// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
-
-// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
+// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 9ae4a5f027d358..f5b618f6a35d37 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -12,8 +12,8 @@ bool device_function() {
return __nvvm_reflect("__CUDA_ARCH") >= 700;
}
-// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
@@ -26,3 +26,9 @@ bool device_function() {
__kernel void kernel_function(__global int *i) {
*i = device_function();
}
+//.
+// CHECK: [[META3]] = !{i32 1}
+// CHECK: [[META4]] = !{!"none"}
+// CHECK: [[META5]] = !{!"int*"}
+// CHECK: [[META6]] = !{!""}
+//.
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index 2e45f73692f534..281339716c3edf 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -44,7 +44,7 @@
// AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]]
// AMDGPU-NEXT: unreachable
//
-// NVPTX-LABEL: define protected void @foo(
+// NVPTX-LABEL: define protected ptx_kernel void @foo(
// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]]
diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
index f940dc05948b3c..c03ef8d33220c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) {
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}
-static void addKernelMetadata(Module &M, GlobalValue *GV) {
+static void addKernelMetadata(Module &M, Function *F) {
llvm::LLVMContext &Ctx = M.getContext();
// Get "nvvm.annotations" metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
- llvm::Metadata *KernelMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
-
// This kernel is only to be called single-threaded.
llvm::Metadata *ThreadXMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadYMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadZMDVals[] = {
- llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
+ llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *BlockMDVals[] = {
- llvm::ConstantAsMetadata::get(GV),
+ llvm::ConstantAsMetadata::get(F),
llvm::MDString::get(Ctx, "maxclusterrank"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations.
- MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
+ F->setCallingConv(CallingConv::PTX_Kernel);
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 98bffd92a087b6..0f2bec711b249d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -311,11 +311,13 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
}
bool isKernelFunction(const Function &F) {
+ if (F.getCallingConv() == CallingConv::PTX_Kernel)
+ return true;
+
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
return (*X == 1);
- // There is no NVVM metadata, check the calling convention
- return F.getCallingConv() == CallingConv::PTX_Kernel;
+ return false;
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
index 89d8c5aa90ab1e..14f33d79b471d3 100644
--- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
@@ -3,7 +3,7 @@
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
-define i32 @daorder(i32 %n) {
+define ptx_kernel i32 @daorder(i32 %n) {
; CHECK-LABEL: for function 'daorder'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -43,6 +43,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @daorder, !"kernel", i32 1}
diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
index 0ac1b5f541471c..cf8ffadcd073cf 100644
--- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
@@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
-define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
+define ptx_kernel i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -27,7 +27,7 @@ merge:
; if (threadIdx.x < 5) // divergent: data dependent
; c = b;
; return c; // c is divergent: sync dependent
-define i32 @sync(i32 %a, i32 %b) {
+define ptx_kernel i32 @sync(i32 %a, i32 %b) {
; CHECK-LABEL: for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
@@ -49,7 +49,7 @@ bb3:
; }
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
-define i32 @mixed(i32 %n, i32 %a, i32 %b) {
+define ptx_kernel i32 @mixed(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
@@ -101,7 +101,7 @@ merge:
; return i == 10 ? 0 : 1; // i here is divergent
;
; The i defined in the loop is used outside.
-define i32 @loop() {
+define ptx_kernel i32 @loop() {
; CHECK-LABEL: for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
@@ -149,7 +149,7 @@ else:
}
; Verifies sync-dependence is computed correctly in the absense of loops.
-define i32 @sync_no_loop(i32 %arg) {
+define ptx_kernel i32 @sync_no_loop(i32 %arg) {
; CHECK-LABEL: for function 'sync_no_loop'
entry:
%0 = add i32 %arg, 1
@@ -174,9 +174,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
-!nvvm.annotations = !{!0, !1, !2, !3, !4}
-!0 = !{ptr @no_diverge, !"kernel", i32 1}
-!1 = !{ptr @sync, !"kernel", i32 1}
-!2 = !{ptr @mixed, !"kernel", i32 1}
-!3 = !{ptr @loop, !"kernel", i32 1}
-!4 = !{ptr @sync_no_loop, !"kernel", i32 1}
diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
index e319211771c0cd..65512bf572f83b 100644
--- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
@@ -3,7 +3,7 @@
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
-define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
+define ptx_kernel i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -27,6 +27,3 @@ merge:
}
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @hidden_diverge, !"kernel", i32 1}
diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
index cd729a918f8145..e1ecc69871b981 100644
--- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
@@ -23,7 +23,7 @@ target triple = "nvptx64-nvidia-cuda"
; V
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
-define i32 @unstructured_loop(i1 %entry_cond) {
+define ptx_kernel i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -59,5 +59,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
-!nvvm.annotations = !{!0}
-!0 = !{ptr @unstructured_loop, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll
index 5d1c390909f6a5..b6317dfb28597c 100644
--- a/llvm/test/CodeGen/NVPTX/b52037.ll
+++ b/llvm/test/CodeGen/NVPTX/b52037.ll
@@ -39,7 +39,7 @@ declare %int3 @hoge(i32, i32, i32) local_unnamed_addr
declare i64 @foo() local_unnamed_addr
-define void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
+define ptx_kernel void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
bb:
tail call void asm sideeffect "// KEEP", ""() #1
%tmp = alloca %struct.zot, align 16
@@ -210,9 +210,6 @@ bb14: ; preds = %bb49.i.lr.ph, %bb49
attributes #0 = { argmemonly mustprogress nofree nounwind willreturn }
attributes #1 = { nounwind }
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @barney, !"kernel", i32 1}
!1 = !{!2, !11, i64 64}
!2 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !3, i64 0, !3, i64 8, !6, i64 16, !8, i64 32, !9, i64 44, !10, i64 48, !11, i64 64, !9, i64 72, !4, i64 76, !9, i64 80}
!3 = !{!"any pointer", !4, i64 0}
diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll
index 9b1f1049c64879..76300e3cfdc5b3 100644
--- a/llvm/test/CodeGen/NVPTX/bug21465.ll
+++ b/llvm/test/CodeGen/NVPTX/bug21465.ll
@@ -8,7 +8,7 @@ target triple = "nvptx64-unknown-unknown"
%struct.S = type { i32, i32 }
; Function Attrs: nounwind
-define void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
+define ptx_kernel void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
entry:
; CHECK-LABEL: @_Z11TakesStruct1SPi
; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi(
@@ -23,7 +23,3 @@ entry:
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @_Z11TakesStruct1SPi, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll
index e3656fd16b2158..ace31667184b0e 100644
--- a/llvm/test/CodeGen/NVPTX/bug22322.ll
+++ b/llvm/test/CodeGen/NVPTX/bug22322.ll
@@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda"
; Function Attrs: nounwind
; CHECK-LABEL: some_kernel
-define void @some_kernel(ptr nocapture %dst) #0 {
+define ptx_kernel void @some_kernel(ptr nocapture %dst) #0 {
_ZL11compute_vecRK6float3jb.exit:
%ret_vec.sroa.8.i = alloca float, align 4
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
@@ -55,8 +55,5 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "n
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
-!nvvm.annotations = !{!0}
!llvm.ident = !{!1}
-
-!0 = !{ptr @some_kernel, !"kernel", i32 1}
!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}
diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll
index 00c97fb381e0e2..193df7f86ca724 100644
--- a/llvm/test/CodeGen/NVPTX/bug26185.ll
+++ b/llvm/test/CodeGen/NVPTX/bug26185.ll
@@ -8,7 +8,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
; CHECK-LABEL: ex_zext
-define void @ex_zext(ptr noalias readonly %data, ptr %res) {
+define ptx_kernel void @ex_zext(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.u8
%val = load i8, ptr %data
@@ -19,7 +19,7 @@ entry:
}
; CHECK-LABEL: ex_sext
-define void @ex_sext(ptr noalias readonly %data, ptr %res) {
+define ptx_kernel void @ex_sext(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.u8
%val = load i8, ptr %data
@@ -30,7 +30,7 @@ entry:
}
; CHECK-LABEL: ex_zext_v2
-define void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
+define ptx_kernel void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.v2.u8
%val = load <2 x i8>, ptr %data
@@ -41,7 +41,7 @@ entry:
}
; CHECK-LABEL: ex_sext_v2
-define void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
+define ptx_kernel void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.v2.u8
%val = load <2 x i8>, ptr %data
@@ -51,8 +51,3 @@ entry:
ret void
}
-!nvvm.annotations = !{!0,!1,!2,!3}
-!0 = !{ptr @ex_zext, !"kernel", i32 1}
-!1 = !{ptr @ex_sext, !"kernel", i32 1}
-!2 = !{ptr @ex_zext_v2, !"kernel", i32 1}
-!3 = !{ptr @ex_sext_v2, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 19f4ef8ec77b91..1c9d271902fd3f 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -16,7 +16,7 @@
; }
; CHECK: .visible .entry kernel_func
-define void @kernel_func(ptr %a) {
+define ptx_kernel void @kernel_func(ptr %a) {
entry:
%buf = alloca [16 x i8], align 4
@@ -56,7 +56,3 @@ entry:
}
declare void @callee(ptr, ptr)
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @kernel_func, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
index c9258addbe04d5..9275c895b224af 100644
--- a/llvm/test/CodeGen/NVPTX/cluster-dim.ll
+++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s
; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
-define void @kernel_func_clusterxyz() {
+define ptx_kernel void @kernel_func_clusterxyz() {
; CHECK80-LABEL: kernel_func_clusterxyz(
; CHECK80: {
; CHECK80-EMPTY:
@@ -23,7 +23,6 @@ define void @kernel_func_clusterxyz() {
}
-!nvvm.annotations = !{!1, !2}
+!nvvm.annotations = !{!1}
-!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1}
-!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}
+!1 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7}
diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
index 43e4dfca1456d4..2b6631154e3876 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -12,7 +12,7 @@ target triple = "nvptx-nvidia-cuda"
@myconst = internal constant i32 420, align 4
-define void @foo(ptr %a, ptr %b) {
+define ptx_kernel void @foo(ptr %a, ptr %b) {
; Expect one load -- @myconst isn't loaded from, because we know its value
; statically.
; CHECK: ld.global.u32
@@ -24,7 +24,3 @@ define void @foo(ptr %a, ptr %b) {
store i32 %ld2, ptr %b
ret void
}
-
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/i1-array-global.ll b/llvm/test/CodeGen/NVPTX/i1-array-global.ll
index ff3848b6f8f752..20b376f94c0d98 100644
--- a/llvm/test/CodeGen/NVPTX/i1-array-global.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-array-global.ll
@@ -7,13 +7,9 @@ target triple = "nvptx-nvidia-cuda"
@global_cst = private constant [6 x i1] [i1 true, i1 false, i1 true, i1 false, i1 true, i1 false]
; CHECK: .global .align 1 .b8 global_cst[6] = {1, 0, 1, 0, 1}
-define void @kernel(i32 %i, ptr %out) {
+define ptx_kernel void @kernel(i32 %i, ptr %out) {
%5 = getelementptr inbounds i1, ptr @global_cst, i32 %i
%6 = load i1, ptr %5, align 1
store i1 %6, ptr %out, align 1
ret void
}
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @kernel, !"kernel", i32 1}
-
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index 83f8f80919f802..f5f1dd9fcf0ea3 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -5,7 +5,7 @@
target triple = "nvptx-nvidia-cuda"
-define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
+define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
; CHECK-LABEL: foo(
; CHECK: .reg .b16 %rs<2>;
; CHECK: .reg .b32 %r<4>;
@@ -28,7 +28,3 @@ define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
store i32 %and, ptr %retval
ret void
}
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/i1-global.ll b/llvm/test/CodeGen/NVPTX/i1-global.ll
index 17af1fa29e6c25..60d2ccd4641941 100644
--- a/llvm/test/CodeGen/NVPTX/i1-global.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-global.ll
@@ -8,13 +8,9 @@ target triple = "nvptx-nvidia-cuda"
@mypred = addrspace(1) global i1 true, align 1
-define void @foo(i1 %p, ptr %out) {
+define ptx_kernel void @foo(i1 %p, ptr %out) {
%ld = load i1, ptr addrspace(1) @mypred
%val = zext i1 %ld to i32
store i32 %val, ptr %out
ret void
}
-
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll
index 3c74ee6aaa3b5c..14d417bca459d2 100644
--- a/llvm/test/CodeGen/NVPTX/i1-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-param.ll
@@ -9,12 +9,8 @@ target triple = "nvptx-nvidia-cuda"
; CHECK: .entry foo
; CHECK: .param .u8 foo_param_0
; CHECK: .param .u64 .ptr .align 1 foo_param_1
-define void @foo(i1 %p, ptr %out) {
+define ptx_kernel void @foo(i1 %p, ptr %out) {
%val = zext i1 %p to i32
store i32 %val, ptr %out
ret void
}
-
-
-!nvvm.annotations = !{!0}
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/intr-range.ll b/llvm/test/CodeGen/NVPTX/intr-range.ll
index 2f3e08a039f52e..86776ab09efc61 100644
--- a/llvm/test/CodeGen/NVPTX/intr-range.ll
+++ b/llvm/test/CodeGen/NVPTX/intr-range.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -mcpu=sm_20 -passes=nvvm-intr-range | FileCheck %s
-define i32 @test_maxntid() {
-; CHECK-LABEL: define i32 @test_maxntid(
+define ptx_kernel i32 @test_maxntid() {
+; CHECK-LABEL: define ptx_kernel i32 @test_maxntid(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
@@ -31,8 +31,8 @@ define i32 @test_maxntid() {
ret i32 %11
}
-define i32 @test_reqntid() {
-; CHECK-LABEL: define i32 @test_reqntid(
+define ptx_kernel i32 @test_reqntid() {
+; CHECK-LABEL: define ptx_kernel i32 @test_reqntid(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
@@ -64,8 +64,8 @@ define i32 @test_reqntid() {
;; A case like this could occur if a function with the sreg intrinsic was
;; inlined into a kernel where the tid metadata is present, ensure the range is
;; updated.
-define i32 @test_inlined() {
-; CHECK-LABEL: define i32 @test_inlined(
+define ptx_kernel i32 @test_inlined() {
+; CHECK-LABEL: define ptx_kernel i32 @test_inlined(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 4) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: ret i32 [[TMP1]]
@@ -83,6 +83,6 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
!nvvm.annotations = !{!0, !1, !2}
-!0 = !{ptr @test_maxntid, !"kernel", i32 1, !"maxntidx", i32 32, !"maxntidz", i32 3}
-!1 = !{ptr @test_reqntid, !"kernel", i32 1, !"reqntidx", i32 20}
-!2 = !{ptr @test_inlined, !"kernel", i32 1, !"maxntidx", i32 4}
+!0 = !{ptr @test_maxntid, !"maxntidx", i32 32, !"maxntidz", i32 3}
+!1 = !{ptr @test_reqntid, !"reqntidx", i32 20}
+!2 = !{ptr @test_inlined, !"maxntidx", i32 4}
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 93d428d6fe6f48..2889d2d89a8579 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -10,7 +10,7 @@
; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3
; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5
-define void @func_align(ptr nocapture readonly align 1 %input,
+define ptx_kernel void @func_align(ptr nocapture readonly align 1 %input,
ptr nocapture align 2 %out,
ptr addrspace(1) align 4 %global,
ptr addrspace(3) align 8 %shared,
@@ -27,7 +27,7 @@ entry:
; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
-define void @func_noalign(ptr nocapture readonly %input,
+define ptx_kernel void @func_noalign(ptr nocapture readonly %input,
ptr nocapture %out,
ptr addrspace(1) %global,
ptr addrspace(3) %shared,
@@ -36,7 +36,3 @@ define void @func_noalign(ptr nocapture readonly %input,
entry:
ret void
}
-
-!nvvm.annotations = !{!0, !1}
-!0 = !{ptr @func_align, !"kernel", i32 1}
-!1 = !{ptr @func_noalign, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
index bdaeccd53fac98..dc1917f3b15078 100644
--- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
+++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -10,7 +10,7 @@ target triple = "nvptx64-unknown-unknown"
; SM20: ld.global.f32
; SM35-LABEL: .visible .entry foo1(
; SM35: ld.global.nc.f32
-define void @foo1(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo1(ptr noalias readonly %from, ptr %to) {
%1 = load float, ptr %from
store float %1, ptr %to
ret void
@@ -20,7 +20,7 @@ define void @foo1(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.f64
; SM35-LABEL: .visible .entry foo2(
; SM35: ld.global.nc.f64
-define void @foo2(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo2(ptr noalias readonly %from, ptr %to) {
%1 = load double, ptr %from
store double %1, ptr %to
ret void
@@ -30,7 +30,7 @@ define void @foo2(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u16
; SM35-LABEL: .visible .entry foo3(
; SM35: ld.global.nc.u16
-define void @foo3(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo3(ptr noalias readonly %from, ptr %to) {
%1 = load i16, ptr %from
store i16 %1, ptr %to
ret void
@@ -40,7 +40,7 @@ define void @foo3(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u32
; SM35-LABEL: .visible .entry foo4(
; SM35: ld.global.nc.u32
-define void @foo4(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo4(ptr noalias readonly %from, ptr %to) {
%1 = load i32, ptr %from
store i32 %1, ptr %to
ret void
@@ -50,7 +50,7 @@ define void @foo4(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u64
; SM35-LABEL: .visible .entry foo5(
; SM35: ld.global.nc.u64
-define void @foo5(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo5(ptr noalias readonly %from, ptr %to) {
%1 = load i64, ptr %from
store i64 %1, ptr %to
ret void
@@ -63,7 +63,7 @@ define void @foo5(ptr noalias readonly %from, ptr %to) {
; SM35-LABEL: .visible .entry foo6(
; SM35: ld.global.nc.u64
; SM35: ld.global.nc.u64
-define void @foo6(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo6(ptr noalias readonly %from, ptr %to) {
%1 = load i128, ptr %from
store i128 %1, ptr %to
ret void
@@ -73,7 +73,7 @@ define void @foo6(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v2.u8
; SM35-LABEL: .visible .entry foo7(
; SM35: ld.global.nc.v2.u8
-define void @foo7(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo7(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x i8>, ptr %from
store <2 x i8> %1, ptr %to
ret void
@@ -83,7 +83,7 @@ define void @foo7(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u32
; SM35-LABEL: .visible .entry foo8(
; SM35: ld.global.nc.u32
-define void @foo8(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo8(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x i16>, ptr %from
store <2 x i16> %1, ptr %to
ret void
@@ -93,7 +93,7 @@ define void @foo8(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v2.u32
; SM35-LABEL: .visible .entry foo9(
; SM35: ld.global.nc.v2.u32
-define void @foo9(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo9(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x i32>, ptr %from
store <2 x i32> %1, ptr %to
ret void
@@ -103,7 +103,7 @@ define void @foo9(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v2.u64
; SM35-LABEL: .visible .entry foo10(
; SM35: ld.global.nc.v2.u64
-define void @foo10(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo10(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x i64>, ptr %from
store <2 x i64> %1, ptr %to
ret void
@@ -113,7 +113,7 @@ define void @foo10(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v2.f32
; SM35-LABEL: .visible .entry foo11(
; SM35: ld.global.nc.v2.f32
-define void @foo11(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo11(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x float>, ptr %from
store <2 x float> %1, ptr %to
ret void
@@ -123,7 +123,7 @@ define void @foo11(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v2.f64
; SM35-LABEL: .visible .entry foo12(
; SM35: ld.global.nc.v2.f64
-define void @foo12(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo12(ptr noalias readonly %from, ptr %to) {
%1 = load <2 x double>, ptr %from
store <2 x double> %1, ptr %to
ret void
@@ -133,7 +133,7 @@ define void @foo12(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u32
; SM35-LABEL: .visible .entry foo13(
; SM35: ld.global.nc.u32
-define void @foo13(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo13(ptr noalias readonly %from, ptr %to) {
%1 = load <4 x i8>, ptr %from
store <4 x i8> %1, ptr %to
ret void
@@ -143,7 +143,7 @@ define void @foo13(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v4.u16
; SM35-LABEL: .visible .entry foo14(
; SM35: ld.global.nc.v4.u16
-define void @foo14(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo14(ptr noalias readonly %from, ptr %to) {
%1 = load <4 x i16>, ptr %from
store <4 x i16> %1, ptr %to
ret void
@@ -153,7 +153,7 @@ define void @foo14(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v4.u32
; SM35-LABEL: .visible .entry foo15(
; SM35: ld.global.nc.v4.u32
-define void @foo15(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo15(ptr noalias readonly %from, ptr %to) {
%1 = load <4 x i32>, ptr %from
store <4 x i32> %1, ptr %to
ret void
@@ -163,7 +163,7 @@ define void @foo15(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.v4.f32
; SM35-LABEL: .visible .entry foo16(
; SM35: ld.global.nc.v4.f32
-define void @foo16(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo16(ptr noalias readonly %from, ptr %to) {
%1 = load <4 x float>, ptr %from
store <4 x float> %1, ptr %to
ret void
@@ -175,7 +175,7 @@ define void @foo16(ptr noalias readonly %from, ptr %to) {
; SM35-LABEL: .visible .entry foo17(
; SM35: ld.global.nc.v2.f64
; SM35: ld.global.nc.v2.f64
-define void @foo17(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo17(ptr noalias readonly %from, ptr %to) {
%1 = load <4 x double>, ptr %from
store <4 x double> %1, ptr %to
ret void
@@ -185,7 +185,7 @@ define void @foo17(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.u64
; SM35-LABEL: .visible .entry foo18(
; SM35: ld.global.nc.u64
-define void @foo18(ptr noalias readonly %from, ptr %to) {
+define ptx_kernel void @foo18(ptr noalias readonly %from, ptr %to) {
%1 = load ptr, ptr %from
store ptr %1, ptr %to
ret void
@@ -196,7 +196,7 @@ define void @foo18(ptr noalias readonly %from, ptr %to) {
; SM20: ld.global.f32
; SM35-LABEL: .visible .entry foo19(
; SM35: ld.global.nc.f32
-define void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
+define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
entry:
br label %loop
@@ -243,24 +243,3 @@ define void @notkernel2(ptr addrspace(1) noalias readonly %from, ptr %to) {
store float %1, ptr %to
ret void
}
-
-!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
-!1 = !{ptr @foo1, !"kernel", i32 1}
-!2 = !{ptr @foo2, !"kernel", i32 1}
-!3 = !{ptr @foo3, !"kernel", i32 1}
-!4 = !{ptr @foo4, !"kernel", i32 1}
-!5 = !{ptr @foo5, !"kernel", i32 1}
-!6 = !{ptr @foo6, !"kernel", i32 1}
-!7 = !{ptr @foo7, !"kernel", i32 1}
-!8 = !{ptr @foo8, !"kernel", i32 1}
-!9 = !{ptr @foo9, !"kernel", i32 1}
-!10 = !{ptr @foo10, !"kernel", i32 1}
-!11 = !{ptr @foo11, !"kernel", i32 1}
-!12 = !{ptr @foo12, !"kernel", i32 1}
-!13 = !{ptr @foo13, !"kernel", i32 1}
-!14 = !{ptr @foo14, !"kernel", i32 1}
-!15 = !{ptr @foo15, !"kernel", i32 1}
-!16 = !{ptr @foo16, !"kernel", i32 1}
-!17 = !{ptr @foo17, !"kernel", i32 1}
-!18 = !{ptr @foo18, !"kernel", i32 1}
-!19 = !{ptr @foo19, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index e42f2303cdf7c3..f21ff974a2c6bb 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -29,7 +29,7 @@ define void @foo(i32 %a) {
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
-define void @foo2(i32 %a) {
+define ptx_kernel void @foo2(i32 %a) {
%local = alloca i32, align 4
store i32 %a, ptr %local
call void @bar(ptr %local)
@@ -38,8 +38,6 @@ define void @foo2(i32 %a) {
declare void @bar(ptr %a)
-!nvvm.annotations = !{!0}
-!0 = !{ptr @foo2, !"kernel", i32 1}
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32-NOT: cvta.local.u32 %SP, %SPL;
diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index 8f2d55151b3113..530b48b3d3e37e 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -6,7 +6,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
-define void @kernel() {
+define ptx_kernel void @kernel() {
; LABEL: @lower_alloca
; PTX-LABEL: .visible .entry kernel(
%A = alloca i32
@@ -37,7 +37,5 @@ define void @alloca_in_explicit_local_as() {
declare void @callee(ptr)
declare void @callee_addrspace5(ptr addrspace(5))
-!nvvm.annotations = !{!0}
!nvvm.annotations = !{!1}
-!0 = !{ptr @kernel, !"kernel", i32 1}
!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 9cfe9192772b89..27cf8ca5b61d69 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -68,7 +68,7 @@ entry:
ret i32 %0, !dbg !23
}
-define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
+define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
; PTX-LABEL: grid_const_int(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -82,7 +82,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou
; PTX-NEXT: add.s32 %r3, %r2, %r1;
; PTX-NEXT: st.global.u32 [%rd2], %r3;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_int(
+; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
@@ -91,6 +91,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
; OPT-NEXT: ret void
+;
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
store i32 %add, ptr %out
@@ -99,7 +100,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou
%struct.s = type { i32, i32 }
-define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
+define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; PTX-LABEL: grid_const_struct(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -113,7 +114,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; PTX-NEXT: add.s32 %r3, %r1, %r2;
; PTX-NEXT: st.global.u32 [%rd2], %r3;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_struct(
+; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
@@ -125,6 +126,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
; OPT-NEXT: ret void
+;
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%int1 = load i32, ptr %gep1
@@ -134,7 +136,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
ret void
}
-define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
+define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
; PTX-NEXT: .reg .b32 %r<3>;
@@ -159,17 +161,18 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: ld.param.b32 %r1, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_escape(
+; OPT-LABEL: define ptx_kernel void @grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
+;
%call = call i32 @escape(ptr %input)
ret void
}
-define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
+define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
; PTX-LABEL: multiple_grid_const_escape(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
@@ -212,7 +215,7 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
; PTX-NEXT: ld.param.b32 %r2, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
-; OPT-LABEL: define void @multiple_grid_const_escape(
+; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
@@ -222,13 +225,14 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
; OPT-NEXT: ret void
+;
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
ret void
}
-define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
+define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
; PTX-LABEL: grid_const_memory_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<6>;
@@ -241,7 +245,7 @@ define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
; PTX-NEXT: st.global.u64 [%rd3], %rd5;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_memory_escape(
+; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
@@ -249,11 +253,12 @@ define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
; OPT-NEXT: ret void
+;
store ptr %input, ptr %addr, align 8
ret void
}
-define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
+define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
; PTX-LABEL: grid_const_inlineasm_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<8>;
@@ -271,7 +276,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt
; PTX-NEXT: st.global.u64 [%rd6], %rd1;
; PTX-NEXT: ret;
; PTX-NOT .local
-; OPT-LABEL: define void @grid_const_inlineasm_escape(
+; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
@@ -282,6 +287,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
; OPT-NEXT: ret void
+;
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
@@ -289,7 +295,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt
ret void
}
-define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
+define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escape(
; PTX: {
; PTX-NEXT: .reg .b32 %r<5>;
@@ -319,7 +325,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-NEXT: ld.param.b32 %r3, [retval0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_partial_escape(
+; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
@@ -330,6 +336,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
+;
%val = load i32, ptr %input
%twice = add i32 %val, %val
store i32 %twice, ptr %output
@@ -337,7 +344,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
ret void
}
-define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
+define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escapemem(
; PTX: {
; PTX-NEXT: .reg .b32 %r<6>;
@@ -369,7 +376,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
-; OPT-LABEL: define i32 @grid_const_partial_escapemem(
+; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
@@ -383,6 +390,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
+;
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%val1 = load i32, ptr %ptr1
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -393,7 +401,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
ret i32 %add
}
-define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
+define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; PTX-LABEL: grid_const_phi(
; PTX: {
; PTX-NEXT: .reg .pred %p<2>;
@@ -415,7 +423,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; PTX-NEXT: ld.u32 %r2, [%rd8];
; PTX-NEXT: st.global.u32 [%rd1], %r2;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_phi(
+; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
@@ -435,6 +443,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -453,7 +462,7 @@ merge:
}
; NOTE: %input2 is *not* grid_constant
-define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) {
+define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) {
; PTX-LABEL: grid_const_phi_ngc(
; PTX: {
; PTX-NEXT: .reg .pred %p<2>;
@@ -478,7 +487,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(
; PTX-NEXT: ld.u32 %r2, [%rd11];
; PTX-NEXT: st.global.u32 [%rd1], %r2;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_phi_ngc(
+; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
@@ -500,6 +509,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
@@ -517,7 +527,7 @@ merge:
}
; NOTE: %input2 is *not* grid_constant
-define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) {
+define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) {
; PTX-LABEL: grid_const_select(
; PTX: {
; PTX-NEXT: .reg .pred %p<2>;
@@ -539,7 +549,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; PTX-NEXT: ld.u32 %r2, [%rd9];
; PTX-NEXT: st.global.u32 [%rd3], %r2;
; PTX-NEXT: ret;
-; OPT-LABEL: define void @grid_const_select(
+; OPT-LABEL: define ptx_kernel void @grid_const_select(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
@@ -553,6 +563,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
+;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
%ptrnew = select i1 %less, ptr %input1, ptr %input2
@@ -561,7 +572,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
ret void
}
-define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
+define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-LABEL: grid_const_ptrtoint(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -576,7 +587,7 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-NEXT: add.s32 %r3, %r1, %r2;
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
-; OPT-LABEL: define i32 @grid_const_ptrtoint(
+; OPT-LABEL: define ptx_kernel i32 @grid_const_ptrtoint(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT3:%.*]] = load i32, ptr addrspace(101) [[INPUT2]], align 4
@@ -584,6 +595,7 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]]
; OPT-NEXT: ret i32 [[KEEPALIVE]]
+;
%val = load i32, ptr %input
%ptrval = ptrtoint ptr %input to i32
%keepalive = add i32 %val, %ptrval
@@ -598,40 +610,40 @@ declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23}
-!0 = !{ptr @grid_const_int, !"kernel", i32 1, !"grid_constant", !1}
+!0 = !{ptr @grid_const_int, !"grid_constant", !1}
!1 = !{i32 1}
-!2 = !{ptr @grid_const_struct, !"kernel", i32 1, !"grid_constant", !3}
+!2 = !{ptr @grid_const_struct, !"grid_constant", !3}
!3 = !{i32 1}
-!4 = !{ptr @grid_const_escape, !"kernel", i32 1, !"grid_constant", !5}
+!4 = !{ptr @grid_const_escape, !"grid_constant", !5}
!5 = !{i32 1}
-!6 = !{ptr @multiple_grid_const_escape, !"kernel", i32 1, !"grid_constant", !7}
+!6 = !{ptr @multiple_grid_const_escape, !"grid_constant", !7}
!7 = !{i32 1, i32 3}
-!8 = !{ptr @grid_const_memory_escape, !"kernel", i32 1, !"grid_constant", !9}
+!8 = !{ptr @grid_const_memory_escape, !"grid_constant", !9}
!9 = !{i32 1}
-!10 = !{ptr @grid_const_inlineasm_escape, !"kernel", i32 1, !"grid_constant", !11}
+!10 = !{ptr @grid_const_inlineasm_escape, !"grid_constant", !11}
!11 = !{i32 1}
-!12 = !{ptr @grid_const_partial_escape, !"kernel", i32 1, !"grid_constant", !13}
+!12 = !{ptr @grid_const_partial_escape, !"grid_constant", !13}
!13 = !{i32 1}
-!14 = !{ptr @grid_const_partial_escapemem, !"kernel", i32 1, !"grid_constant", !15}
+!14 = !{ptr @grid_const_partial_escapemem, !"grid_constant", !15}
!15 = !{i32 1}
-!16 = !{ptr @grid_const_phi, !"kernel", i32 1, !"grid_constant", !17}
+!16 = !{ptr @grid_const_phi, !"grid_constant", !17}
!17 = !{i32 1}
-!18 = !{ptr @grid_const_phi_ngc, !"kernel", i32 1, !"grid_constant", !19}
+!18 = !{ptr @grid_const_phi_ngc, !"grid_constant", !19}
!19 = !{i32 1}
-!20 = !{ptr @grid_const_select, !"kernel", i32 1, !"grid_constant", !21}
+!20 = !{ptr @grid_const_select, !"grid_constant", !21}
!21 = !{i32 1}
-!22 = !{ptr @grid_const_ptrtoint, !"kernel", i32 1, !"grid_constant", !23}
+!22 = !{ptr @grid_const_ptrtoint, !"grid_constant", !23}
!23 = !{i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index eba4f273fa709d..269bba75dc5fb3 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -65,7 +65,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
}
; COMMON-LABEL: ptr_generic
-define void @ptr_generic(ptr %out, ptr %in) {
+define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
; IRC: %in3 = addrspacecast ptr %in to ptr addrspace(1)
; IRC: %in4 = addrspacecast ptr addrspace(1) %in3 to ptr
; IRC: %out1 = addrspacecast ptr %out to ptr addrspace(1)
@@ -87,7 +87,7 @@ define void @ptr_generic(ptr %out, ptr %in) {
}
; COMMON-LABEL: ptr_nongeneric
-define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
+define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
; IR-NOT: addrspacecast
; PTX-NOT: cvta.to.global
; PTX: ld.const.u32
@@ -98,7 +98,7 @@ define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
}
; COMMON-LABEL: ptr_as_int
- define void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
+ define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
; IR: [[P:%.*]] = inttoptr i64 %i to ptr
; IRC: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; IRC: addrspacecast ptr addrspace(1) [[P1]] to ptr
@@ -121,7 +121,7 @@ define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
%struct.S = type { i64 }
; COMMON-LABEL: ptr_as_int_aggr
-define void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
+define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
; IR: [[S:%.*]] = addrspacecast ptr %s to ptr addrspace(101)
; IR: [[I:%.*]] = load i64, ptr addrspace(101) [[S]], align 8
; IR: [[P0:%.*]] = inttoptr i64 [[I]] to ptr
@@ -146,8 +146,3 @@ define void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) ali
; Function Attrs: convergent nounwind
declare dso_local ptr @escape(ptr) local_unnamed_addr
-!nvvm.annotations = !{!0, !1, !2, !3}
-!0 = !{ptr @ptr_generic, !"kernel", i32 1}
-!1 = !{ptr @ptr_nongeneric, !"kernel", i32 1}
-!2 = !{ptr @ptr_as_int, !"kernel", i32 1}
-!3 = !{ptr @ptr_as_int_aggr, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 5c52626a711fed..26102722a483ba 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -24,8 +24,8 @@ declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture read
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @read_only(
+define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @read_only(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -35,7 +35,7 @@ define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocap
; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @read_only(
+; SM_70-LABEL: define dso_local ptx_kernel void @read_only(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -45,7 +45,7 @@ define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocap
; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @read_only(
+; COPY-LABEL: define dso_local ptx_kernel void @read_only(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -62,8 +62,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @read_only_gep(
+define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -74,7 +74,7 @@ define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr n
; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @read_only_gep(
+; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -85,7 +85,7 @@ define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr n
; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @read_only_gep(
+; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -104,8 +104,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @read_only_gep_asc(
+define dso_local ptx_kernel void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -116,7 +116,7 @@ define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, p
; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @read_only_gep_asc(
+; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -127,7 +127,7 @@ define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, p
; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @read_only_gep_asc(
+; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -148,8 +148,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @read_only_gep_asc0(
+define dso_local ptx_kernel void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -164,7 +164,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @read_only_gep_asc0(
+; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -179,7 +179,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @read_only_gep_asc0(
+; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -202,8 +202,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @escape_ptr(
+define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr(
; SM_60-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -214,7 +214,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
; SM_60-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @escape_ptr(
+; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr(
; SM_70-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -225,7 +225,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
; SM_70-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @escape_ptr(
+; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr(
; COPY-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -240,8 +240,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @escape_ptr_gep(
+define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
; SM_60-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -253,7 +253,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
; SM_60-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]])
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @escape_ptr_gep(
+; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
; SM_70-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -265,7 +265,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
; SM_70-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]])
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @escape_ptr_gep(
+; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
; COPY-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -282,8 +282,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @escape_ptr_store(
+define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -294,7 +294,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
; SM_60-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @escape_ptr_store(
+; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -305,7 +305,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
; SM_70-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @escape_ptr_store(
+; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -320,8 +320,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @escape_ptr_gep_store(
+define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -333,7 +333,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
; SM_60-NEXT: store ptr [[B]], ptr [[OUT2]], align 8
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @escape_ptr_gep_store(
+; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -345,7 +345,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
; SM_70-NEXT: store ptr [[B]], ptr [[OUT2]], align 8
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @escape_ptr_gep_store(
+; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -362,8 +362,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @escape_ptrtoint(
+define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -375,7 +375,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
; SM_60-NEXT: store i64 [[I]], ptr [[OUT2]], align 8
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @escape_ptrtoint(
+; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -387,7 +387,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
; SM_70-NEXT: store i64 [[I]], ptr [[OUT2]], align 8
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @escape_ptrtoint(
+; COPY-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -404,8 +404,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @memcpy_from_param(
+define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -414,7 +414,7 @@ define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, p
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @memcpy_from_param(
+; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -423,7 +423,7 @@ define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, p
; SM_70-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @memcpy_from_param(
+; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -438,8 +438,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @memcpy_from_param_noalign(
+define dso_local ptx_kernel void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -448,7 +448,7 @@ define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonl
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @memcpy_from_param_noalign(
+; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
@@ -457,7 +457,7 @@ define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonl
; SM_70-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @memcpy_from_param_noalign(
+; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 8
@@ -472,8 +472,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @memcpy_to_param(
+define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
; SM_60-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[ENTRY:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -484,7 +484,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
; SM_60-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @memcpy_to_param(
+; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
; SM_70-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[ENTRY:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -495,7 +495,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
; SM_70-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @memcpy_to_param(
+; COPY-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
; COPY-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[ENTRY:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -510,8 +510,8 @@ entry:
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 {
-; SM_60-LABEL: define dso_local void @copy_on_store(
+define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 {
+; SM_60-LABEL: define dso_local ptx_kernel void @copy_on_store(
; SM_60-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_60-NEXT: [[BB:.*:]]
; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -523,7 +523,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
; SM_60-NEXT: store i32 [[I]], ptr [[S3]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define dso_local void @copy_on_store(
+; SM_70-LABEL: define dso_local ptx_kernel void @copy_on_store(
; SM_70-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
@@ -535,7 +535,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
; SM_70-NEXT: store i32 [[I]], ptr [[S3]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define dso_local void @copy_on_store(
+; COPY-LABEL: define dso_local ptx_kernel void @copy_on_store(
; COPY-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
; COPY-NEXT: [[BB:.*:]]
; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
@@ -551,8 +551,8 @@ bb:
ret void
}
-define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
-; SM_60-LABEL: define void @test_select(
+define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
+; SM_60-LABEL: define ptx_kernel void @test_select(
; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
; SM_60-NEXT: [[BB:.*:]]
; SM_60-NEXT: [[OUT7:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
@@ -568,7 +568,7 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define void @test_select(
+; SM_70-LABEL: define ptx_kernel void @test_select(
; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
@@ -582,7 +582,7 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[OUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define void @test_select(
+; COPY-LABEL: define ptx_kernel void @test_select(
; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
; COPY-NEXT: [[BB:.*:]]
; COPY-NEXT: [[INPUT23:%.*]] = alloca i32, align 4
@@ -603,8 +603,8 @@ bb:
ret void
}
-define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
-; SM_60-LABEL: define void @test_select_write(
+define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
+; SM_60-LABEL: define ptx_kernel void @test_select_write(
; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; SM_60-NEXT: [[BB:.*:]]
; SM_60-NEXT: [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
@@ -619,7 +619,7 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; SM_60-NEXT: store i32 1, ptr [[PTRNEW]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define void @test_select_write(
+; SM_70-LABEL: define ptx_kernel void @test_select_write(
; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
@@ -634,7 +634,7 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
; SM_70-NEXT: store i32 1, ptr [[PTRNEW]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define void @test_select_write(
+; COPY-LABEL: define ptx_kernel void @test_select_write(
; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; COPY-NEXT: [[BB:.*:]]
; COPY-NEXT: [[INPUT23:%.*]] = alloca i32, align 4
@@ -653,8 +653,8 @@ bb:
ret void
}
-define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) {
-; SM_60-LABEL: define void @test_phi(
+define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) {
+; SM_60-LABEL: define ptx_kernel void @test_phi(
; SM_60-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; SM_60-NEXT: [[BB:.*:]]
; SM_60-NEXT: [[INOUT7:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
@@ -678,7 +678,7 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[INOUT8]], align 4
; SM_60-NEXT: ret void
;
-; SM_70-LABEL: define void @test_phi(
+; SM_70-LABEL: define ptx_kernel void @test_phi(
; SM_70-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; SM_70-NEXT: [[BB:.*:]]
; SM_70-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
@@ -700,7 +700,7 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; SM_70-NEXT: ret void
;
-; COPY-LABEL: define void @test_phi(
+; COPY-LABEL: define ptx_kernel void @test_phi(
; COPY-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; COPY-NEXT: [[BB:.*:]]
; COPY-NEXT: [[INPUT23:%.*]] = alloca [[STRUCT_S]], align 8
@@ -740,8 +740,8 @@ merge: ; preds = %second, %first
ret void
}
-define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) {
-; COMMON-LABEL: define void @test_phi_write(
+define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) {
+; COMMON-LABEL: define ptx_kernel void @test_phi_write(
; COMMON-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
; COMMON-NEXT: [[BB:.*:]]
; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
@@ -784,29 +784,11 @@ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
!llvm.module.flags = !{!0, !1, !2, !3}
-!nvvm.annotations = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !23}
!llvm.ident = !{!20, !21}
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 8]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
!3 = !{i32 7, !"frame-pointer", i32 2}
-!4 = !{ptr @read_only, !"kernel", i32 1}
-!5 = !{ptr @escape_ptr, !"kernel", i32 1}
-!6 = !{ptr @escape_ptr_gep, !"kernel", i32 1}
-!7 = !{ptr @escape_ptr_store, !"kernel", i32 1}
-!8 = !{ptr @escape_ptr_gep_store, !"kernel", i32 1}
-!9 = !{ptr @escape_ptrtoint, !"kernel", i32 1}
-!10 = !{ptr @memcpy_from_param, !"kernel", i32 1}
-!11 = !{ptr @memcpy_to_param, !"kernel", i32 1}
-!12 = !{ptr @copy_on_store, !"kernel", i32 1}
-!13 = !{ptr @read_only_gep, !"kernel", i32 1}
-!14 = !{ptr @read_only_gep_asc, !"kernel", i32 1}
-!15 = !{ptr @read_only_gep_asc0, !"kernel", i32 1}
-!16 = !{ptr @test_select, !"kernel", i32 1}
-!17 = !{ptr @test_phi, !"kernel", i32 1}
-!18 = !{ptr @test_phi_write, !"kernel", i32 1}
-!19 = !{ptr @test_select_write, !"kernel", i32 1}
!20 = !{!"clang version 20.0.0git"}
!21 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
-!23 = !{ptr @memcpy_from_param_noalign, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
index f8b3b4b9b8c446..4ee1ca3ad4b1f0 100644
--- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -43,7 +43,7 @@ define internal void @bar() {
ret void
}
-; CHECK-LABEL: define weak_odr void @"nvptx$device$init"() {
+; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8
@@ -60,7 +60,7 @@ define internal void @bar() {
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define weak_odr void @"nvptx$device$fini"() {
+; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8
@@ -82,12 +82,10 @@ define internal void @bar() {
; CHECK: while.end:
; CHECK-NEXT: ret void
-; CHECK: [[META0:![0-9]+]] = !{ptr @"nvptx$device$init", !"kernel", i32 1}
; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1}
-; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"kernel", i32 1}
; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 9ec690a68e7ea7..2e64c255948115 100644
--- a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -6,7 +6,7 @@ target triple = "nvptx64-nvidia-cuda"
; Verify that both %input and %output are converted to global pointers and then
; addrspacecast'ed back to the original type.
-define void @kernel(ptr %input, ptr %output) {
+define ptx_kernel void @kernel(ptr %input, ptr %output) {
; CHECK-LABEL: .visible .entry kernel(
; CHECK: cvta.to.global.u64
; CHECK: cvta.to.global.u64
@@ -17,7 +17,7 @@ define void @kernel(ptr %input, ptr %output) {
ret void
}
-define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) {
+define ptx_kernel void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) {
; CHECK-LABEL: .visible .entry kernel2(
; CHECK-NOT: cvta.to.global.u64
%1 = load float, ptr addrspace(1) %input, align 4
@@ -29,7 +29,7 @@ define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) {
%struct.S = type { ptr, ptr }
-define void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) {
+define ptx_kernel void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) {
; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
@@ -60,7 +60,3 @@ define void @ptr_in_byval_func(ptr byval(%struct.S) %input, ptr %output) {
ret void
}
-!nvvm.annotations = !{!0, !1, !2}
-!0 = !{ptr @kernel, !"kernel", i32 1}
-!1 = !{ptr @kernel2, !"kernel", i32 1}
-!2 = !{ptr @ptr_in_byval_kernel, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
index 3389e090aac578..c445c34c1842a5 100644
--- a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
+++ b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
@@ -11,16 +11,15 @@ target triple = "nvptx64-unknown-unknown"
; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
; sielently ignored.
-define dso_local void @_Z18TestMaxClusterRankv() {
+define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() {
entry:
%a = alloca i32, align 4
store volatile i32 1, ptr %a, align 4
ret void
}
-!nvvm.annotations = !{!0, !1, !2, !3}
+!nvvm.annotations = !{!1, !2, !3}
-!0 = !{ptr @_Z18TestMaxClusterRankv, !"kernel", i32 1}
!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}
diff --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 2bc6d4cfa7f6d2..2a0c5ab7299ba8 100644
--- a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -66,7 +66,4 @@ if.end17: ; preds = %if.else13, %if.then
}
; Function Attrs: noduplicate nounwind
-declare void @llvm.nvvm.barrier0() #2
-
-!0 = !{ptr @foo, !"kernel", i32 1}
-!1 = !{null, !"align", i32 8}
+declare void @llvm.nvvm.barrier0() #2
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/noreturn.ll b/llvm/test/CodeGen/NVPTX/noreturn.ll
index 2161d70a885299..6c11d0a9376a32 100644
--- a/llvm/test/CodeGen/NVPTX/noreturn.ll
+++ b/llvm/test/CodeGen/NVPTX/noreturn.ll
@@ -27,7 +27,7 @@ define void @true_noreturn0() #0 {
; CHECK: .entry ignore_kernel_noreturn()
; CHECK-NOT: .noreturn
-define void @ignore_kernel_noreturn() #0 {
+define ptx_kernel void @ignore_kernel_noreturn() #0 {
unreachable
}
@@ -35,7 +35,7 @@ define void @ignore_kernel_noreturn() #0 {
; CHECK: prototype_{{[0-9]+}} : .callprototype ()_ (.param .b32 _) .noreturn;
; CHECK: prototype_{{[0-9]+}} : .callprototype (.param .b32 _) _ (.param .b32 _);
-define void @callprototype_noreturn(i32) {
+define ptx_kernel void @callprototype_noreturn(i32) {
%fn = load ptr, ptr addrspace(1) @function_pointer
call void %fn(i32 %0) #0
%non_void = bitcast ptr %fn to ptr
@@ -44,8 +44,3 @@ define void @callprototype_noreturn(i32) {
}
attributes #0 = { noreturn }
-
-!nvvm.annotations = !{!0, !1}
-
-!0 = !{ptr @ignore_kernel_noreturn, !"kernel", i32 1}
-!1 = !{ptr @callprototype_noreturn, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
index 48162eaba257de..9a78d31302e157 100644
--- a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
@@ -3,7 +3,7 @@
target triple = "nvptx-unknown-nvcl"
-define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) {
+define ptx_kernel void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) {
; The parameter alignment is determined by the align attribute (default 1).
; CHECK-LABEL: .entry foo(
; CHECK: .param .u64 .ptr .align 32 foo_param_2
@@ -11,7 +11,6 @@ define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) {
ret void
}
-!nvvm.annotations = !{!1, !2, !3}
-!1 = !{ptr @foo, !"kernel", i32 1}
+!nvvm.annotations = !{!2, !3}
!2 = !{ptr @foo, !"rdoimage", i32 0}
!3 = !{ptr @foo, !"sampler", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/refl1.ll b/llvm/test/CodeGen/NVPTX/refl1.ll
index 34db3bb1a1a9a0..99b83f49ff9b15 100644
--- a/llvm/test/CodeGen/NVPTX/refl1.ll
+++ b/llvm/test/CodeGen/NVPTX/refl1.ll
@@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda"
; Function Attrs: nounwind
; CHECK: .entry foo
-define void @foo(ptr nocapture %a) #0 {
+define ptx_kernel void @foo(ptr nocapture %a) #0 {
%val = load float, ptr %a
%tan = tail call fastcc float @__nv_fast_tanf(float %val)
store float %tan, ptr %a
@@ -34,7 +34,3 @@ entry:
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { alwaysinline inlinehint nounwind readnone }
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/reg-copy.ll b/llvm/test/CodeGen/NVPTX/reg-copy.ll
index f66ef195c625b3..20396c4cc69fe8 100644
--- a/llvm/test/CodeGen/NVPTX/reg-copy.ll
+++ b/llvm/test/CodeGen/NVPTX/reg-copy.ll
@@ -4,7 +4,7 @@
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
-define void @PR24303(ptr %f) {
+define ptx_kernel void @PR24303(ptr %f) {
; CHECK-LABEL: .visible .entry PR24303(
; Do not use mov.f or mov.u to convert between float and int.
; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}}
@@ -217,7 +217,3 @@ _ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, %
}
declare float @llvm.nvvm.fabs.f(float)
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @PR24303, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll
index 3580604d429de3..991ae04b91b67d 100644
--- a/llvm/test/CodeGen/NVPTX/simple-call.ll
+++ b/llvm/test/CodeGen/NVPTX/simple-call.ll
@@ -10,7 +10,7 @@ define float @device_func(float %a) noinline {
}
; CHECK: .entry kernel_func
-define void @kernel_func(ptr %a) {
+define ptx_kernel void @kernel_func(ptr %a) {
%val = load float, ptr %a
; CHECK: call.uni (retval0),
; CHECK: device_func,
@@ -18,9 +18,3 @@ define void @kernel_func(ptr %a) {
store float %mul, ptr %a
ret void
}
-
-
-
-!nvvm.annotations = !{!1}
-
-!1 = !{ptr @kernel_func, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index 504dcdeb3370ce..7a7904a2f04252 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -10,7 +10,7 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
-define void @foo(i64 %img, ptr %red, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -34,7 +34,7 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
@surf0 = internal addrspace(1) global i64 0, align 8
-define void @bar(ptr %red, i32 %idx) {
+define ptx_kernel void @bar(ptr %red, i32 %idx) {
; CHECK-LABEL: bar(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -56,11 +56,5 @@ define void @bar(ptr %red, i32 %idx) {
ret void
}
-
-
-
-!nvvm.annotations = !{!1, !2, !3}
-!1 = !{ptr @foo, !"kernel", i32 1}
-!2 = !{ptr @bar, !"kernel", i32 1}
-!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
-
+!nvvm.annotations = !{!1}
+!1 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll
index e0cebd60d7dd09..cd11b5617076b1 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read.ll
@@ -6,7 +6,7 @@ target triple = "nvptx64-unknown-nvcl"
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
; CHECK: .entry foo
-define void @foo(i64 %img, ptr %red, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) {
; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
@@ -16,6 +16,5 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
ret void
}
-!nvvm.annotations = !{!1, !2}
-!1 = !{ptr @foo, !"kernel", i32 1}
-!2 = !{ptr @foo, !"rdwrimage", i32 0}
+!nvvm.annotations = !{!1}
+!1 = !{ptr @foo, !"rdwrimage", i32 0}
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index 9607a58856bac8..90d67666f1ed62 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -224,11 +224,6 @@ def get_ptx_surface(target):
def get_surface_metadata(target, fun_ty, fun_name, has_surface_param):
metadata = []
- md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
- fun_ty=fun_ty, fun_name=fun_name
- )
- metadata.append(md_kernel)
-
if target == "cuda":
# When a parameter is lowered as a .surfref, it still has the
# corresponding ld.param.u64, which is illegal. Do not emit the
@@ -263,14 +258,14 @@ def gen_suld_tests(target, global_surf):
; CHECK-LABEL: .entry ${test_name}_param
; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
;
- define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
%val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
store ${retty} %val, ${retty}* %ret
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
- define void @${test_name}_global(${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
store ${retty} %val, ${retty}* %ret
@@ -356,13 +351,13 @@ def gen_sust_tests(target, global_surf):
; CHECK-LABEL: .entry ${test_name}_param
; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
;
- define void @${test_name}_param(i64 %s, ${value}, ${access}) {
+ define ptx_kernel void @${test_name}_param(i64 %s, ${value}, ${access}) {
tail call void @${intrinsic}(i64 %s, ${access}, ${value})
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
- define void @${test_name}_global(${value}, ${access}) {
+ define ptx_kernel void @${test_name}_global(${value}, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
ret void
@@ -420,19 +415,13 @@ def gen_sust_tests(target, global_surf):
generated_items.append((params["intrinsic"], params["instruction"]))
fun_name = test_name + "_param"
- fun_ty = "void (i64, {value_ty}, {access_ty})*".format(
- value_ty=get_llvm_value_type(vec, ctype),
- access_ty=get_llvm_surface_access_type(geom),
- )
+ fun_ty = "ptr"
generated_metadata += get_surface_metadata(
target, fun_ty, fun_name, has_surface_param=True
)
fun_name = test_name + "_global"
- fun_ty = "void ({value_ty}, {access_ty})*".format(
- value_ty=get_llvm_value_type(vec, ctype),
- access_ty=get_llvm_surface_access_type(geom),
- )
+ fun_ty = "ptr"
generated_metadata += get_surface_metadata(
target, fun_ty, fun_name, has_surface_param=False
)
@@ -559,11 +548,6 @@ def get_ptx_global_sampler(target, global_sampler):
def get_texture_metadata(target, fun_ty, fun_name, has_texture_params):
metadata = []
- md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
- fun_ty=fun_ty, fun_name=fun_name
- )
- metadata.append(md_kernel)
-
if target == "cuda":
# When a parameter is lowered as a .texref, it still has the
# corresponding ld.param.u64, which is illegal. Do not emit the
@@ -615,14 +599,14 @@ def gen_tex_tests(target, global_tex, global_sampler):
; CHECK-LABEL: .entry ${test_name}_param
; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
- define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
%val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
store ${retty} %val, ${retty}* %ret
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
- define void @${test_name}_global(${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
%val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
@@ -799,14 +783,14 @@ def gen_tld4_tests(target, global_tex, global_sampler):
; CHECK-LABEL: .entry ${test_name}_param
; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
- define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
%val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
store ${retty} %val, ${retty}* %ret
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
- define void @${test_name}_global(${retty}* %ret, ${access}) {
+ define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
%val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index 881ea459feb489..5dc44cb1925b01 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -10,7 +10,7 @@ declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
-define void @foo(i64 %img, i32 %val, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -30,7 +30,7 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
@surf0 = internal addrspace(1) global i64 0, align 8
-define void @bar(i32 %val, i32 %idx) {
+define ptx_kernel void @bar(i32 %val, i32 %idx) {
; CHECK-LABEL: bar(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -47,8 +47,6 @@ define void @bar(i32 %val, i32 %idx) {
}
-!nvvm.annotations = !{!1, !2, !3}
-!1 = !{ptr @foo, !"kernel", i32 1}
-!2 = !{ptr @bar, !"kernel", i32 1}
-!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
+!nvvm.annotations = !{!1}
+!1 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll
index 258bb6d8b5b71b..0e1f0cc7009939 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write.ll
@@ -6,12 +6,11 @@ target triple = "nvptx-unknown-nvcl"
declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
; CHECK: .entry foo
-define void @foo(i64 %img, i32 %val, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) {
; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
ret void
}
-!nvvm.annotations = !{!1, !2}
-!1 = !{ptr @foo, !"kernel", i32 1}
-!2 = !{ptr @foo, !"wroimage", i32 0}
+!nvvm.annotations = !{!1}
+!1 = !{ptr @foo, !"wroimage", i32 0}
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index ba556d2d9bd6bc..61837bde82ece1 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -10,7 +10,7 @@ target triple = "nvptx-unknown-cuda"
declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
-define void @foo(i64 %img, ptr %red, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
@@ -34,7 +34,7 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
@tex0 = internal addrspace(1) global i64 0, align 8
-define void @bar(ptr %red, i32 %idx) {
+define ptx_kernel void @bar(ptr %red, i32 %idx) {
; CHECK-LABEL: bar(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
@@ -57,7 +57,7 @@ define void @bar(ptr %red, i32 %idx) {
declare float @texfunc(i64)
-define void @baz(ptr %red, i32 %idx) {
+define ptx_kernel void @baz(ptr %red, i32 %idx) {
; CHECK-LABEL: baz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
@@ -93,8 +93,5 @@ define void @baz(ptr %red, i32 %idx) {
ret void
}
-!nvvm.annotations = !{!1, !2, !3, !4}
-!1 = !{ptr @foo, !"kernel", i32 1}
-!2 = !{ptr @bar, !"kernel", i32 1}
-!3 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
-!4 = !{ptr @baz, !"kernel", i32 1}
+!nvvm.annotations = !{!1}
+!1 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll
index d11aea45a65f0e..d74c89f5abc8dc 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read.ll
@@ -6,7 +6,7 @@ target triple = "nvptx64-unknown-nvcl"
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)
; CHECK: .entry foo
-define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) {
+define ptx_kernel void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
@@ -15,7 +15,6 @@ define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) {
ret void
}
-!nvvm.annotations = !{!1, !2, !3}
-!1 = !{ptr @foo, !"kernel", i32 1}
+!nvvm.annotations = !{!2, !3}
!2 = !{ptr @foo, !"rdoimage", i32 0}
!3 = !{ptr @foo, !"sampler", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 286f3588a754f1..80cf938d48b535 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -21,7 +21,7 @@ target triple = "nvptx-unknown-cuda"
declare void @throw() #0
declare void @llvm.trap() #0
-define void @kernel_func() {
+define ptx_kernel void @kernel_func() {
; NO-TRAP-UNREACHABLE-LABEL: kernel_func(
; NO-TRAP-UNREACHABLE: {
; NO-TRAP-UNREACHABLE-EMPTY:
@@ -102,6 +102,3 @@ define void @kernel_func_2() {
}
attributes #0 = { noreturn }
-
-!nvvm.annotations = !{!1}
-!1 = !{ptr @kernel_func, !"kernel", i32 1}
diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll
index 26ad59723abf01..82301e42f7d067 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll
@@ -4,7 +4,7 @@
@GLOBAL = addrspace(1) externally_initialized global i32 0, align 4, !dbg !0
@SHARED = addrspace(3) externally_initialized global i32 undef, align 4, !dbg !6
-define void @test(float, ptr, ptr, i32) !dbg !17 {
+define ptx_kernel void @test(float, ptr, ptr, i32) !dbg !17 {
%5 = alloca float, align 4
%6 = alloca ptr, align 8
%7 = alloca ptr, align 8
@@ -38,7 +38,6 @@ define void @test(float, ptr, ptr, i32) !dbg !17 {
declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.dbg.cu = !{!2}
-!nvvm.annotations = !{!10}
!llvm.module.flags = !{!11, !12, !13, !14, !15}
!llvm.ident = !{!16}
@@ -52,7 +51,6 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!7 = distinct !DIGlobalVariable(name: "SHARED", scope: !2, file: !8, line: 4, type: !9, isLocal: false, isDefinition: true)
!8 = !DIFile(filename: "test.cu", directory: "/tmp")
!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!10 = !{ptr @test, !"kernel", i32 1}
!11 = !{i32 2, !"Dwarf Version", i32 2}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll
index 55c81caaed0560..c926229f96e38b 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-info.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll
@@ -59,7 +59,7 @@
; CHECK: }
; Function Attrs: nounwind
-define void @_Z5saxpyifPfS_(i32 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) local_unnamed_addr #0 !dbg !566 {
+define ptx_kernel void @_Z5saxpyifPfS_(i32 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) local_unnamed_addr #0 !dbg !566 {
entry:
call void @llvm.dbg.value(metadata i32 %n, metadata !570, metadata !DIExpression()), !dbg !575
call void @llvm.dbg.value(metadata float %a, metadata !571, metadata !DIExpression()), !dbg !576
@@ -8496,7 +8496,6 @@ attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
-!nvvm.annotations = !{!555, !556, !557, !556, !558, !558, !558, !558, !559, !559, !558}
!llvm.module.flags = !{!560, !561, !562, !563}
!llvm.ident = !{!564}
!nvvm.internalize.after.link = !{}
@@ -9057,11 +9056,6 @@ attributes #3 = { nounwind }
!552 = !DISubprogram(name: "tgammaf", linkageName: "_ZL7tgammaff", scope: !444, file: !444, line: 1592, type: !13, isLocal: true, isDefinition: false, flags: DIFlagPrototyped, isOptimized: true)
!553 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !5, entity: !554, file: !445, line: 459)
!554 = !DISubprogram(name: "truncf", linkageName: "_ZL6truncff", scope: !462, file: !462, line: 662, type: !13, isLocal: true, isDefinition: false, flags: DIFlagPrototyped, isOptimized: true)
-!555 = !{ptr @_Z5saxpyifPfS_, !"kernel", i32 1}
-!556 = !{null, !"align", i32 8}
-!557 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
-!558 = !{null, !"align", i32 16}
-!559 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
!560 = !{i32 2, !"Dwarf Version", i32 2}
!561 = !{i32 2, !"Debug Info Version", i32 3}
!562 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
index 8761122f756fc2..e6b5991d8dfb34 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
@@ -13,7 +13,7 @@ target triple = "nvptx64-nvidia-cuda"
; That would be worthless, because "i" is simulated by two 32-bit registers and
; truncating it to 32-bit is as simple as directly using the register that
; contains the low bits.
-define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) {
+define ptx_kernel void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) {
; CHECK-LABEL: @trunc_is_free(
entry:
%cmp.4 = icmp eq i64 %begin, %end
@@ -41,5 +41,3 @@ for.body: ; preds = %for.body.preheader,
declare void @_Z3usei(i32)
-!nvvm.annotations = !{!0}
-!0 = !{ptr @trunc_is_free, !"kernel", i32 1}
diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
index 92766d5a11aa5d..420e844b51039a 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
@@ -11,7 +11,7 @@ target triple = "nvptx64-nvidia-cuda"
; use((b + i) * s);
; }
; }
-define void @foo(i32 %b, i32 %s) {
+define ptx_kernel void @foo(i32 %b, i32 %s) {
; CHECK-LABEL: .visible .entry foo(
entry:
; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
@@ -65,7 +65,3 @@ for.inc.3: ; preds = %if.then.3, %for.inc
declare zeroext i1 @cond(i32)
declare void @use(i32)
-
-!nvvm.annotations = !{!0}
-
-!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cf58bc5d8f475a..659ab1227f1137 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -237,15 +237,7 @@ class NVVMDialectLLVMIRTranslationInterface
generateMetadata(value.getInt(), "maxnreg");
} else if (attribute.getName() ==
NVVM::NVVMDialect::getKernelFuncAttrName()) {
- llvm::Metadata *llvmMetadataKernel[] = {
- llvm::ValueAsMetadata::get(llvmFunc),
- llvm::MDString::get(llvmContext, "kernel"),
- llvm::ValueAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 1))};
- llvm::MDNode *llvmMetadataNode =
- llvm::MDNode::get(llvmContext, llvmMetadataKernel);
- moduleTranslation.getOrInsertNamedModuleMetadata("nvvm.annotations")
- ->addOperand(llvmMetadataNode);
+ llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
return success();
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 6a32190694b470..ea41440c7f9ddc 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -556,9 +556,7 @@ llvm.func @kernel_func() attributes {nvvm.kernel} {
llvm.return
}
-// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
+// CHECK: ptx_kernel void @kernel_func
// -----
@@ -566,9 +564,8 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"maxntidx", i32 1}
// CHECK: {ptr @kernel_func, !"maxntidy", i32 23}
// CHECK: {ptr @kernel_func, !"maxntidz", i32 32}
@@ -578,9 +575,8 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.reqntid = array<i32: 1, 2
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"reqntidx", i32 1}
// CHECK: {ptr @kernel_func, !"reqntidy", i32 23}
// CHECK: {ptr @kernel_func, !"reqntidz", i32 32}
@@ -590,31 +586,28 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.cluster_dim = array<i32:
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"cluster_dim_x", i32 3}
// CHECK: {ptr @kernel_func, !"cluster_dim_y", i32 5}
// CHECK: {ptr @kernel_func, !"cluster_dim_z", i32 7}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// -----
llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.cluster_max_blocks = 8} {
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"cluster_max_blocks", i32 8}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// -----
llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.minctasm = 16} {
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"minctasm", i32 16}
// -----
@@ -622,9 +615,8 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxnreg = 16} {
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"maxnreg", i32 16}
// -----
@@ -633,9 +625,8 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
llvm.return
}
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-// CHECK: {ptr @kernel_func, !"kernel", i32 1}
// CHECK: {ptr @kernel_func, !"maxnreg", i32 32}
// CHECK: {ptr @kernel_func, !"maxntidx", i32 1}
// CHECK: {ptr @kernel_func, !"maxntidy", i32 23}
@@ -643,19 +634,19 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
// CHECK: {ptr @kernel_func, !"minctasm", i32 16}
// -----
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
// CHECK: !1 = !{ptr @kernel_func, !"grid_constant", !2}
// CHECK: !2 = !{i32 1}
-// CHECK: !3 = !{ptr @kernel_func, !"kernel", i32 1}
llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}) attributes {nvvm.kernel} {
llvm.return
}
// -----
+// CHECK: define ptx_kernel void @kernel_func
// CHECK: !nvvm.annotations =
// CHECK: !1 = !{ptr @kernel_func, !"grid_constant", !2}
// CHECK: !2 = !{i32 1, i32 3}
-// CHECK: !3 = !{ptr @kernel_func, !"kernel", i32 1}
llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}, %arg1: f32, %arg2: !llvm.ptr {llvm.byval = f32, nvvm.grid_constant}) attributes {nvvm.kernel} {
llvm.return
}
More information about the Mlir-commits
mailing list