[Mlir-commits] [clang] [llvm] [mlir] [NVPTX] Convert scalar function nvvm.annotations to attributes (PR #125908)
Alex MacLean
llvmlistbot at llvm.org
Wed Feb 5 15:47:04 PST 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/125908
>From 12bdf8bfa72b10d1e8ccc305cd57c337f2799e52 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 5 Feb 2025 18:46:03 +0000
Subject: [PATCH 1/2] [NVPTX] Convert scalar function nvvm.annotations to
attributes
---
clang/lib/CodeGen/Targets/NVPTX.cpp | 15 ++---
clang/test/CodeGenCUDA/launch-bounds.cu | 32 ++++++----
llvm/docs/NVPTXUsage.rst | 37 +++++++----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +-
llvm/lib/IR/AutoUpgrade.cpp | 16 +++++
.../Target/NVPTX/NVPTXCtorDtorLowering.cpp | 9 +--
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 13 +++-
.../KernelInfo/launch-bounds/nvptx.ll | 4 +-
llvm/test/CodeGen/NVPTX/annotations.ll | 12 +---
llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll | 16 +++--
llvm/test/CodeGen/NVPTX/maxclusterrank.ll | 8 +--
.../CodeGen/NVPTX/upgrade-nvvm-annotations.ll | 64 +++++++++++++++----
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 7 +-
.../LLVMIR/external-func-dialect-attr.mlir | 4 +-
mlir/test/Target/LLVMIR/nvvmir.mlir | 21 +++---
15 files changed, 160 insertions(+), 100 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index b82e4ddb9f3f2b..f89d32d4e13fe9 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -375,11 +375,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
if (MinBlocks > 0) {
if (MinBlocksVal)
*MinBlocksVal = MinBlocks.getExtValue();
- if (F) {
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
- MinBlocks.getExtValue());
- }
+ if (F)
+ F->addFnAttr("nvvm.minctasm", llvm::utostr(MinBlocks.getExtValue()));
}
}
if (Attr->getMaxBlocks()) {
@@ -388,11 +385,9 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
if (MaxBlocks > 0) {
if (MaxClusterRankVal)
*MaxClusterRankVal = MaxBlocks.getExtValue();
- if (F) {
- // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
- NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
- MaxBlocks.getExtValue());
- }
+ if (F)
+ F->addFnAttr("nvvm.maxclusterrank",
+ llvm::utostr(MaxBlocks.getExtValue()));
}
}
}
diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu
index 31ca9216b413e9..72f7857264f8cf 100644
--- a/clang/test/CodeGenCUDA/launch-bounds.cu
+++ b/clang/test/CodeGenCUDA/launch-bounds.cu
@@ -9,6 +9,25 @@
#define MAX_BLOCKS_PER_MP 4
#endif
+// CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
+// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
+// CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
+// CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
+// CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
+
+// CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
+// CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
+// CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
+
+// CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
+// CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
+// CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
+// CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
+
+// CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
+// CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
+// CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
+
// Test both max threads per block and Min cta per sm.
extern "C" {
__global__ void
@@ -19,7 +38,6 @@ Kernel1()
}
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
-// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
#ifdef USE_MAX_BLOCKS
// Test max threads per block and min/max cta per sm.
@@ -32,8 +50,6 @@ Kernel1_sm_90()
}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
#endif // USE_MAX_BLOCKS
// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -67,7 +83,6 @@ Kernel4()
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
-// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
#ifdef USE_MAX_BLOCKS
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -79,8 +94,6 @@ Kernel4_sm_90()
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
#endif //USE_MAX_BLOCKS
const int constint = 100;
@@ -94,7 +107,6 @@ Kernel5()
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
-// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
#ifdef USE_MAX_BLOCKS
@@ -109,8 +121,6 @@ Kernel5_sm_90()
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
#endif //USE_MAX_BLOCKS
// Make sure we don't emit negative launch bounds values.
@@ -120,7 +130,6 @@ Kernel6()
{
}
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
-// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
@@ -144,12 +153,9 @@ Kernel7_sm_90()
const char constchar = 12;
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
-// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
#ifdef USE_MAX_BLOCKS
const char constchar_2 = 14;
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
-// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
#endif // USE_MAX_BLOCKS
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 64dd2b84a1763e..304c4fa2252a2c 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -33,17 +33,12 @@ Marking Functions as Kernels
In PTX, there are two types of functions: *device functions*, which are only
callable by device code, and *kernel functions*, which are callable by host
-code. By default, the back-end will emit device functions. Metadata is used to
-declare a function as a kernel function. This metadata is attached to the
-``nvvm.annotations`` named metadata object, and has the following format:
+code. By default, the back-end will emit device functions. The ``ptx_kernel``
+calling convention is used to declare a function as a kernel function.
-.. code-block:: text
-
- !0 = !{<function-ref>, metadata !"kernel", i32 1}
-
-The first parameter is a reference to the kernel function. The following
-example shows a kernel function calling a device function in LLVM IR. The
-function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
+The following example shows a kernel function calling a device function in LLVM
+IR. The function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is
+not.
.. code-block:: llvm
@@ -53,18 +48,32 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
ret float %add
}
- define void @my_kernel(ptr %ptr) {
+ define ptx_kernel void @my_kernel(ptr %ptr) {
%val = load float, ptr %ptr
%ret = call float @my_fmad(float %val, float %val, float %val)
store float %ret, ptr %ptr
ret void
}
- !nvvm.annotations = !{!1}
- !1 = !{ptr @my_kernel, !"kernel", i32 1}
-
When compiled, the PTX kernel functions are callable by host-side code.
+.. _nvptx_fnattrs:
+
+Function Attributes
+-------------------
+
+``"nvvm.maxclusterrank"="<n>"``
+ This attribute specifies the maximum number of blocks per cluster. Must be
+ non-zero. Only supported for Hopper+.
+
+``"nvvm.minctasm"="<n>"``
+ This indicates a hint/directive to the compiler/driver, asking it to put at
+ least these many CTAs on an SM.
+
+``"nvvm.maxnreg"="<n>"``
+ This attribute indicates the maximum number of registers to be used for the
+ kernel function.
+
.. _address_spaces:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 695b15ac31f380..fee280cf4f705f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6452,7 +6452,7 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
int32_t LB, int32_t UB) {
if (T.isNVPTX())
if (UB > 0)
- updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
+ Kernel.addFnAttr("nvvm.maxclusterrank", llvm::utostr(UB));
if (T.isAMDGPU())
Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e886a6012b219a..57072715366c9c 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -5043,6 +5044,21 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
return true;
}
+ if (K == "maxclusterrank" || K == "cluster_max_blocks") {
+ const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
+ return true;
+ }
+ if (K == "minctasm") {
+ const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
+ return true;
+ }
+ if (K == "maxnreg") {
+ const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
+ return true;
+ }
return false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
index c03ef8d33220c1..ae5922cba4ce3b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -70,18 +70,13 @@ static void addKernelMetadata(Module &M, Function *F) {
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
- llvm::Metadata *BlockMDVals[] = {
- llvm::ConstantAsMetadata::get(F),
- llvm::MDString::get(Ctx, "maxclusterrank"),
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+ F->addFnAttr("nvvm.maxclusterrank", "1");
+ F->setCallingConv(CallingConv::PTX_Kernel);
// Append metadata to nvvm.annotations.
- F->setCallingConv(CallingConv::PTX_Kernel);
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
- MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals));
}
static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index a41943880807c5..187b8905750129 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -179,6 +179,13 @@ static bool argHasNVVMAnnotation(const Value &Val,
return false;
}
+static std::optional<unsigned> getFnAttrParsedIntOrNull(const Function &F,
+ StringRef Attr) {
+ if (F.hasFnAttribute(Attr))
+ return F.getFnAttributeAsParsedInteger(Attr);
+ return std::nullopt;
+}
+
bool isParamGridConstant(const Value &V) {
if (const Argument *Arg = dyn_cast<Argument>(&V)) {
// "grid_constant" counts argument indices starting from 1
@@ -277,7 +284,7 @@ std::optional<unsigned> getClusterDimz(const Function &F) {
}
std::optional<unsigned> getMaxClusterRank(const Function &F) {
- return findOneNVVMAnnotation(&F, "maxclusterrank");
+ return getFnAttrParsedIntOrNull(F, "nvvm.maxclusterrank");
}
std::optional<unsigned> getReqNTIDx(const Function &F) {
@@ -303,11 +310,11 @@ std::optional<unsigned> getReqNTID(const Function &F) {
}
std::optional<unsigned> getMinCTASm(const Function &F) {
- return findOneNVVMAnnotation(&F, "minctasm");
+ return getFnAttrParsedIntOrNull(F, "nvvm.minctasm");
}
std::optional<unsigned> getMaxNReg(const Function &F) {
- return findOneNVVMAnnotation(&F, "maxnreg");
+ return getFnAttrParsedIntOrNull(F, "nvvm.maxnreg");
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
diff --git a/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll b/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll
index 7a055c7152ec85..a0c06083c270bc 100644
--- a/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll
+++ b/llvm/test/Analysis/KernelInfo/launch-bounds/nvptx.ll
@@ -23,11 +23,12 @@ entry:
attributes #0 = {
"omp_target_num_teams"="100"
"omp_target_thread_limit"="101"
+ "nvvm.maxclusterrank"="200"
}
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
-!nvvm.annotations = !{!6, !7, !8, !9, !10}
+!nvvm.annotations = !{!7, !8, !9, !10}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C11, file: !2, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
@@ -35,7 +36,6 @@ attributes #0 = {
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !4, scopeLine: 10, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !3)
-!6 = !{ptr @test, !"maxclusterrank", i32 200}
!7 = !{ptr @test, !"maxntidx", i32 210}
!8 = !{ptr @test, !"maxntidy", i32 211}
!9 = !{ptr @test, !"maxntidz", i32 212}
diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll
index 3bd534bb0cf5d2..1f888d7fb21f1e 100644
--- a/llvm/test/CodeGen/NVPTX/annotations.ll
+++ b/llvm/test/CodeGen/NVPTX/annotations.ll
@@ -23,20 +23,20 @@ define void @kernel_func_reqntid(ptr %a) {
}
; CHECK: .entry kernel_func_minctasm
-define void @kernel_func_minctasm(ptr %a) {
+define ptx_kernel void @kernel_func_minctasm(ptr %a) "nvvm.minctasm"="42" {
; CHECK: .minnctapersm 42
; CHECK: ret
ret void
}
; CHECK-LABEL: .entry kernel_func_maxnreg
-define void @kernel_func_maxnreg() {
+define ptx_kernel void @kernel_func_maxnreg() "nvvm.maxnreg"="1234" {
; CHECK: .maxnreg 1234
; CHECK: ret
ret void
}
-!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
+!nvvm.annotations = !{!1, !2, !3, !4, !9, !10}
!1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1}
!2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
@@ -44,11 +44,5 @@ define void @kernel_func_maxnreg() {
!3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1}
!4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
-!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1}
-!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42}
-
-!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1}
-!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234}
-
!9 = !{ptr addrspace(1) @texture, !"texture", i32 1}
!10 = !{ptr addrspace(1) @surface, !"surface", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
index 4ee1ca3ad4b1f0..71daa8ccef2f05 100644
--- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -43,7 +43,8 @@ define internal void @bar() {
ret void
}
-; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() {
+; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8
@@ -60,7 +61,8 @@ define internal void @bar() {
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() {
+; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8
; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8
@@ -82,11 +84,11 @@ define internal void @bar() {
; CHECK: while.end:
; CHECK-NEXT: ret void
+; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="1" }
+
; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1}
-; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
-; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
-; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
-; CHECK: [[META9:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxclusterrank", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
+; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
+; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
index c445c34c1842a5..51483296dd34fe 100644
--- a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
+++ b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll
@@ -10,16 +10,14 @@ target triple = "nvptx64-unknown-unknown"
; CHECK_SM_80-NOT: .maxclusterrank 8
; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
-; sielently ignored.
-define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() {
+; silently ignored.
+define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() "nvvm.minctasm"="2" "nvvm.maxclusterrank"="8" {
entry:
%a = alloca i32, align 4
store volatile i32 1, ptr %a, align 4
ret void
}
-!nvvm.annotations = !{!1, !2, !3}
+!nvvm.annotations = !{!1}
!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
-!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
-!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}
diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
index a9f370a12a945a..3a1f59454493cb 100644
--- a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
+++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
@@ -1,28 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
-; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s
+; RUN: opt < %s -passes=verify -S | FileCheck %s
-define i32 @foo(i32 %a, i32 %b) {
-; CHECK-LABEL: define i32 @foo(
+define i32 @test_align(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @test_align(
; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) {
; CHECK-NEXT: ret i32 0
;
ret i32 0
}
-define i32 @bar(i32 %a, i32 %b) {
-; CHECK-LABEL: define ptx_kernel i32 @bar(
-; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT: ret i32 0
+define void @test_kernel() {
+; CHECK-LABEL: define ptx_kernel void @test_kernel() {
+; CHECK-NEXT: ret void
;
- ret i32 0
+ ret void
+}
+
+define void @test_maxclusterrank() {
+; CHECK-LABEL: define void @test_maxclusterrank(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
}
-!nvvm.annotations = !{!0, !1, !2}
+define void @test_cluster_max_blocks() {
+; CHECK-LABEL: define void @test_cluster_max_blocks(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
-!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010}
+define void @test_minctasm() {
+; CHECK-LABEL: define void @test_minctasm(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+define void @test_maxnreg() {
+; CHECK-LABEL: define void @test_maxnreg(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6}
+
+!0 = !{ptr @test_align, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010}
!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008}
-!2 = !{ptr @bar, !"kernel", i32 1}
+!2 = !{ptr @test_kernel, !"kernel", i32 1}
+!3 = !{ptr @test_maxclusterrank, !"maxclusterrank", i32 2}
+!4 = !{ptr @test_cluster_max_blocks, !"cluster_max_blocks", i32 3}
+!5 = !{ptr @test_minctasm, !"minctasm", i32 4}
+!6 = !{ptr @test_maxnreg, !"maxnreg", i32 5}
;.
-; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8}
+; CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="2" }
+; CHECK: attributes #[[ATTR1]] = { "nvvm.maxclusterrank"="3" }
+; CHECK: attributes #[[ATTR2]] = { "nvvm.minctasm"="4" }
+; CHECK: attributes #[[ATTR3]] = { "nvvm.maxnreg"="5" }
+;.
+; CHECK: [[META0:![0-9]+]] = !{ptr @test_align, !"align", i32 8}
;.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 659ab1227f1137..8b137357746630 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -17,6 +17,7 @@
#include "mlir/IR/Operation.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -227,14 +228,14 @@ class NVVMDialectLLVMIRTranslationInterface
} else if (attribute.getName() ==
NVVM::NVVMDialect::getClusterMaxBlocksAttrName()) {
auto value = dyn_cast<IntegerAttr>(attribute.getValue());
- generateMetadata(value.getInt(), "cluster_max_blocks");
+ llvmFunc->addFnAttr("nvvm.maxclusterrank", llvm::utostr(value.getInt()));
} else if (attribute.getName() ==
NVVM::NVVMDialect::getMinctasmAttrName()) {
auto value = dyn_cast<IntegerAttr>(attribute.getValue());
- generateMetadata(value.getInt(), "minctasm");
+ llvmFunc->addFnAttr("nvvm.minctasm", llvm::utostr(value.getInt()));
} else if (attribute.getName() == NVVM::NVVMDialect::getMaxnregAttrName()) {
auto value = dyn_cast<IntegerAttr>(attribute.getValue());
- generateMetadata(value.getInt(), "maxnreg");
+ llvmFunc->addFnAttr("nvvm.maxnreg", llvm::utostr(value.getInt()));
} else if (attribute.getName() ==
NVVM::NVVMDialect::getKernelFuncAttrName()) {
llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel);
diff --git a/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir b/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir
index 6605f10f128e61..459859f5be47b2 100644
--- a/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir
+++ b/mlir/test/Target/LLVMIR/external-func-dialect-attr.mlir
@@ -6,6 +6,6 @@
module {
llvm.func external @f() attributes { nvvm.minctasm = 10 : i32 }
- // CHECK: !nvvm.annotations = !{![[NVVM:[0-9]+]]}
- // CHECK: ![[NVVM]] = !{ptr @f, !"minctasm", i32 10}
+ // CHECK: declare void @f() #[[ATTRS:[0-9]+]]
+ // CHECK: attributes #[[ATTRS]] = { "nvvm.minctasm"="10" }
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 970cac707b058f..5ab593452ab669 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -623,27 +623,25 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.cluster_max_blocks = 8} {
llvm.return
}
-// CHECK: define ptx_kernel void @kernel_func
-// CHECK: !nvvm.annotations =
-// CHECK: {ptr @kernel_func, !"cluster_max_blocks", i32 8}
+// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]]
+// CHECK: attributes #[[ATTR0]] = { "nvvm.maxclusterrank"="8" }
+
// -----
llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.minctasm = 16} {
llvm.return
}
-// CHECK: define ptx_kernel void @kernel_func
-// CHECK: !nvvm.annotations =
-// CHECK: {ptr @kernel_func, !"minctasm", i32 16}
+// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]]
+// CHECK: attributes #[[ATTR0]] = { "nvvm.minctasm"="16" }
// -----
llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxnreg = 16} {
llvm.return
}
-// CHECK: define ptx_kernel void @kernel_func
-// CHECK: !nvvm.annotations =
-// CHECK: {ptr @kernel_func, !"maxnreg", i32 16}
+// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]]
+// CHECK: attributes #[[ATTR0]] = { "nvvm.maxnreg"="16" }
// -----
llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 23, 32>,
@@ -651,13 +649,12 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array<i32: 1, 2
llvm.return
}
-// CHECK: define ptx_kernel void @kernel_func
+// CHECK: define ptx_kernel void @kernel_func() #[[ATTR0:[0-9]+]]
+// CHECK: attributes #[[ATTR0]] = { "nvvm.maxnreg"="32" "nvvm.minctasm"="16" }
// CHECK: !nvvm.annotations =
-// CHECK: {ptr @kernel_func, !"maxnreg", i32 32}
// CHECK: {ptr @kernel_func, !"maxntidx", i32 1}
// CHECK: {ptr @kernel_func, !"maxntidy", i32 23}
// CHECK: {ptr @kernel_func, !"maxntidz", i32 32}
-// CHECK: {ptr @kernel_func, !"minctasm", i32 16}
// -----
// CHECK: define ptx_kernel void @kernel_func
>From 717a01d2be7a45719f59f7aa065b4416a58458c9 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 5 Feb 2025 23:46:51 +0000
Subject: [PATCH 2/2] address comments
---
llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 187b8905750129..430502d85dfb46 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -179,11 +179,11 @@ static bool argHasNVVMAnnotation(const Value &Val,
return false;
}
-static std::optional<unsigned> getFnAttrParsedIntOrNull(const Function &F,
- StringRef Attr) {
- if (F.hasFnAttribute(Attr))
- return F.getFnAttributeAsParsedInteger(Attr);
- return std::nullopt;
+static std::optional<unsigned> getFnAttrParsedInt(const Function &F,
+ StringRef Attr) {
+ return F.hasFnAttribute(Attr)
+ ? std::optional(F.getFnAttributeAsParsedInteger(Attr))
+ : std::nullopt;
}
bool isParamGridConstant(const Value &V) {
@@ -284,7 +284,7 @@ std::optional<unsigned> getClusterDimz(const Function &F) {
}
std::optional<unsigned> getMaxClusterRank(const Function &F) {
- return getFnAttrParsedIntOrNull(F, "nvvm.maxclusterrank");
+ return getFnAttrParsedInt(F, "nvvm.maxclusterrank");
}
std::optional<unsigned> getReqNTIDx(const Function &F) {
@@ -310,11 +310,11 @@ std::optional<unsigned> getReqNTID(const Function &F) {
}
std::optional<unsigned> getMinCTASm(const Function &F) {
- return getFnAttrParsedIntOrNull(F, "nvvm.minctasm");
+ return getFnAttrParsedInt(F, "nvvm.minctasm");
}
std::optional<unsigned> getMaxNReg(const Function &F) {
- return getFnAttrParsedIntOrNull(F, "nvvm.maxnreg");
+ return getFnAttrParsedInt(F, "nvvm.maxnreg");
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
More information about the Mlir-commits
mailing list