[clang] [llvm] [mlir] [NVPTX] Auto-upgrade nvvm.grid_constant to param attribute (PR #155489)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 26 13:14:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
Upgrade the !"grid_constant" !nvvm.annotation to a "nvvm.grid_constant" attribute. This attribute is much simpler for front-ends to apply and faster and simpler to query.
---
Patch is 34.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155489.diff
9 Files Affected:
- (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+6-36)
- (modified) clang/test/CodeGenCUDA/grid-constant.cu (+6-10)
- (modified) llvm/docs/NVPTXUsage.rst (+19-40)
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+10)
- (modified) llvm/lib/Target/NVPTX/NVPTXUtilities.cpp (+4-28)
- (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+31-73)
- (modified) llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll (+11-2)
- (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp (+2-43)
- (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (+2-8)
``````````diff
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index e874617796f86..78790daa1874a 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -87,10 +87,6 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
- static void
- addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
- const SmallVectorImpl<int> &GridConstantArgs);
-
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
@@ -266,27 +262,24 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// By default, all functions are device functions
if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
// OpenCL/CUDA kernel functions get kernel metadata
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
if (FD->hasAttr<CUDAGlobalAttr>()) {
- SmallVector<int, 10> GCI;
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+
for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
- // For some reason arg indices are 1-based in NVVM
- GCI.push_back(IV.index() + 1);
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
- addGridConstantNVVMMetadata(F, GCI);
+ F->addParamAttr(
+ IV.index(),
+ llvm::Attribute::get(F->getContext(), "nvvm.grid_constant"));
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
}
}
// Attach kernel metadata directly if compiling for NVPTX.
- if (FD->hasAttr<DeviceKernelAttr>()) {
+ if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
- }
}
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
@@ -306,29 +299,6 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
-void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
- llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
-
- llvm::Module *M = GV->getParent();
- llvm::LLVMContext &Ctx = M->getContext();
-
- // Get "nvvm.annotations" metadata node
- llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
-
- SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
- if (!GridConstantArgs.empty()) {
- SmallVector<llvm::Metadata *, 10> GCM;
- for (int I : GridConstantArgs)
- GCM.push_back(llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
- MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
- llvm::MDNode::get(Ctx, GCM)});
- }
-
- // Append metadata to nvvm.annotations
- MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-}
-
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
index e7000cab3cda5..120b854e56746 100644
--- a/clang/test/CodeGenCUDA/grid-constant.cu
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -19,13 +19,9 @@ void foo() {
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>(1, {});
}
-//.
-//.
-// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
-// CHECK: [[META1]] = !{i32 1, i32 3}
-// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
-// CHECK: [[META3]] = !{i32 1}
-// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
-// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
-// CHECK: [[META6]] = !{i32 2}
-//.
+
+// CHECK: define dso_local ptx_kernel void @_Z6kernel1Sii(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %gc_arg1, i32 noundef %arg2, i32 noundef "nvvm.grid_constant" %gc_arg3)
+// CHECK: define ptx_kernel void @_Z13tkernel_constIK1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+// CHECK: define ptx_kernel void @_Z13tkernel_constI1SEvT_(ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+// CHECK: define ptx_kernel void @_Z7tkernelIK1SEviT_(i32 noundef %dummy, ptr noundef byval(%struct.S) align 1 "nvvm.grid_constant" %arg)
+
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 629bf2ea5afb4..4c8c605edfdd6 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -57,6 +57,19 @@ not.
When compiled, the PTX kernel functions are callable by host-side code.
+
+Parameter Attributes
+--------------------
+
+``"nvvm.grid_constant"``
+ This attribute may be attached to a ``byval`` parameter of a kernel function
+ to indicate that the parameter should be lowered as a direct reference to
+ the grid-constant memory of the parameter, as opposed to a copy of the
+ parameter in local memory. Writing to a grid-constant parameter is
+ undefined behavior. Unlike a normal ``byval`` parameter, the address of a
+ grid-constant parameter is not unique to a given function invocation but
+ instead is shared by all kernels in the grid.
+
.. _nvptx_fnattrs:
Function Attributes
@@ -2289,9 +2302,9 @@ The Kernel
; Intrinsic to read X component of thread ID
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
- define void @kernel(ptr addrspace(1) %A,
- ptr addrspace(1) %B,
- ptr addrspace(1) %C) {
+ define ptx_kernel void @kernel(ptr addrspace(1) %A,
+ ptr addrspace(1) %B,
+ ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
@@ -2314,9 +2327,6 @@ The Kernel
ret void
}
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
We can use the LLVM ``llc`` tool to directly run the NVPTX code generator:
@@ -2442,34 +2452,6 @@ and non-generic address spaces.
See :ref:`address_spaces` and :ref:`nvptx_intrinsics` for more information.
-Kernel Metadata
-^^^^^^^^^^^^^^^
-
-In PTX, a function can be either a `kernel` function (callable from the host
-program), or a `device` function (callable only from GPU code). You can think
-of `kernel` functions as entry-points in the GPU program. To mark an LLVM IR
-function as a `kernel` function, we make use of special LLVM metadata. The
-NVPTX back-end will look for a named metadata node called
-``nvvm.annotations``. This named metadata must contain a list of metadata that
-describe the IR. For our purposes, we need to declare a metadata node that
-assigns the "kernel" attribute to the LLVM IR function that should be emitted
-as a PTX `kernel` function. These metadata nodes take the form:
-
-.. code-block:: text
-
- !{<function ref>, metadata !"kernel", i32 1}
-
-For the previous example, we have:
-
-.. code-block:: llvm
-
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
-Here, we have a single metadata declaration in ``nvvm.annotations``. This
-metadata annotates our ``@kernel`` function with the ``kernel`` attribute.
-
-
Running the Kernel
------------------
@@ -2669,9 +2651,9 @@ Libdevice provides an ``__nv_powf`` function that we will use.
; libdevice function
declare float @__nv_powf(float, float)
- define void @kernel(ptr addrspace(1) %A,
- ptr addrspace(1) %B,
- ptr addrspace(1) %C) {
+ define ptx_kernel void @kernel(ptr addrspace(1) %A,
+ ptr addrspace(1) %B,
+ ptr addrspace(1) %C) {
entry:
; What is my ID?
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
@@ -2694,9 +2676,6 @@ Libdevice provides an ``__nv_powf`` function that we will use.
ret void
}
- !nvvm.annotations = !{!0}
- !0 = !{ptr @kernel, !"kernel", i32 1}
-
To compile this kernel, we perform the following steps:
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e200f3626e69d..7ea9c6dff13b8 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5381,6 +5381,16 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
return true;
}
+ if (K == "grid_constant") {
+ const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
+ for (const auto &Op : cast<MDNode>(V)->operands()) {
+ // For some reason, the index is 1-based in the metadata. Good thing we're
+ // able to auto-upgrade it!
+ const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
+ cast<Function>(GV)->addParamAttr(Index, Attr);
+ }
+ return true;
+ }
return false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 274b04fdd30b5..8e97b422218f7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -55,15 +55,6 @@ void clearAnnotationCache(const Module *Mod) {
AC.Cache.erase(Mod);
}
-static void readIntVecFromMDNode(const MDNode *MetadataNode,
- std::vector<unsigned> &Vec) {
- for (unsigned i = 0, e = MetadataNode->getNumOperands(); i != e; ++i) {
- ConstantInt *Val =
- mdconst::extract<ConstantInt>(MetadataNode->getOperand(i));
- Vec.push_back(Val->getZExtValue());
- }
-}
-
static void cacheAnnotationFromMD(const MDNode *MetadataNode,
key_val_pair_t &retval) {
auto &AC = getAnnotationCache();
@@ -83,19 +74,8 @@ static void cacheAnnotationFromMD(const MDNode *MetadataNode,
if (ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(
MetadataNode->getOperand(i + 1))) {
retval[Key].push_back(Val->getZExtValue());
- } else if (MDNode *VecMd =
- dyn_cast<MDNode>(MetadataNode->getOperand(i + 1))) {
- // note: only "grid_constant" annotations support vector MDNodes.
- // assert: there can only exist one unique key value pair of
- // the form (string key, MDNode node). Operands of such a node
- // shall always be unsigned ints.
- auto [It, Inserted] = retval.try_emplace(Key);
- if (Inserted) {
- readIntVecFromMDNode(VecMd, It->second);
- continue;
- }
} else {
- llvm_unreachable("Value operand not a constant int or an mdnode");
+ llvm_unreachable("Value operand not a constant int");
}
}
}
@@ -179,16 +159,13 @@ static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) {
}
static bool argHasNVVMAnnotation(const Value &Val,
- const std::string &Annotation,
- const bool StartArgIndexAtOne = false) {
+ const std::string &Annotation) {
if (const Argument *Arg = dyn_cast<Argument>(&Val)) {
const Function *Func = Arg->getParent();
std::vector<unsigned> Annot;
if (findAllNVVMAnnotation(Func, Annotation, Annot)) {
- const unsigned BaseOffset = StartArgIndexAtOne ? 1 : 0;
- if (is_contained(Annot, BaseOffset + Arg->getArgNo())) {
+ if (is_contained(Annot, Arg->getArgNo()))
return true;
- }
}
}
return false;
@@ -250,8 +227,7 @@ bool isParamGridConstant(const Argument &Arg) {
}
// "grid_constant" counts argument indices starting from 1
- if (argHasNVVMAnnotation(Arg, "grid_constant",
- /*StartArgIndexAtOne*/ true))
+ if (Arg.hasAttribute("nvvm.grid_constant"))
return true;
return false;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 8adde4ceefbf4..01ab47145940c 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -49,14 +49,14 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: st.param.b32 [func_retval0], %r10;
; PTX-NEXT: ret;
entry:
- %a. = select i1 %b, ptr %a, ptr addrspacecast (ptr addrspace(1) @gi to ptr), !dbg !17
- %idx.ext = sext i32 %c to i64, !dbg !18
- %add.ptr = getelementptr inbounds i8, ptr %a., i64 %idx.ext, !dbg !18
- %0 = load i32, ptr %add.ptr, align 1, !dbg !19
- ret i32 %0, !dbg !23
+ %a. = select i1 %b, ptr %a, ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+ %idx.ext = sext i32 %c to i64
+ %add.ptr = getelementptr inbounds i8, ptr %a., i64 %idx.ext
+ %0 = load i32, ptr %add.ptr, align 1
+ ret i32 %0
}
-define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
+define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 "nvvm.grid_constant" %input1, i32 %input2, ptr %out, i32 %n) {
; PTX-LABEL: grid_const_int(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -71,7 +71,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: st.global.b32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
-; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
@@ -85,7 +85,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
%struct.s = type { i32, i32 }
-define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
+define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %out){
; PTX-LABEL: grid_const_struct(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
@@ -100,7 +100,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: st.global.b32 [%rd2], %r3;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
@@ -118,7 +118,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
ret void
}
-define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
+define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<4>;
@@ -136,7 +136,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
@@ -145,7 +145,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
ret void
}
-define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
+define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, i32 %a, ptr byval(i32) align 4 "nvvm.grid_constant" %b) {
; PTX-LABEL: multiple_grid_const_escape(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
@@ -179,7 +179,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 "nvvm.grid_constant" [[B:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
@@ -194,7 +194,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
ret void
}
-define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
+define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %addr) {
; PTX-LABEL: grid_const_memory_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<5>;
@@ -207,7 +207,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: st.global.b64 [%rd3], %rd4;
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
@@ -216,7 +216,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
ret void
}
-define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
+define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 "nvvm.grid_constant" %input, ptr %result) {
; PTX-LABEL: grid_const_inlineasm_escape(
; PTX: {
; PTX-NEXT: .reg .b64 %rd<7>;
@@ -234,7 +234,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: ret;
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
@@ -249,7 +249,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
ret void
}
-define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
+define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) "nvvm.grid_constant" %input, ptr %output) {
; PTX-LABEL: grid_const_partial_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155489
More information about the llvm-commits
mailing list