[llvm] [NVPTX] Attach Range attr to setmaxnreg and fence intrinsics (PR #144120)

Fri Jun 13 10:01:29 PDT 2025

https://github.com/durga4github created https://github.com/llvm/llvm-project/pull/144120

This patch attaches the range attribute to the setmaxnreg
and fence.proxy.tensormap.* intrinsics. The range checks
are now handled generically in the Verifier. So, this patch
removes the per-intrinsic error-handling for range-checks
from the Verifier.

This patch also adds more coverage tests for these cases.

>From 00210bca5cc7c408db77270265e86c875e376b3d Mon Sep 17 00:00:00 2001
From: Durgadoss R <durgadossr at nvidia.com>
Date: Fri, 13 Jun 2025 18:23:44 +0530
Subject: [PATCH] [NVPTX] Attach Range attr to setmaxnreg and fence intrinsics

This patch attaches the range attribute to the
setmaxnreg and fence.proxy.tensormap.* intrinsics.
The range checks are now handled in a generic manner
in the Verifier. So, this patch removes the per-intrinsic
error-handling for range-checks from the Verifier.

This patch also adds more coverage tests for these cases.

Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
---
 llvm/include/llvm/IR/IntrinsicsNVVM.td          |  8 ++++++--
 llvm/lib/IR/Verifier.cpp                        | 10 ----------
 .../Verifier/NVPTX/fence-proxy.tensormap.ll     | 17 +++++++++++++++++
 llvm/test/Verifier/NVPTX/setmaxnreg.ll          |  4 +++-
 4 files changed, 26 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 4efdff71c0167..410a0dea2bf57 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1341,9 +1341,11 @@ foreach scope = ["cta", "cluster", "gpu", "sys"] in {
         Intrinsic<[], [], [IntrNoCallback],
         "llvm.nvvm.fence.proxy.tensormap_generic.release." # scope>;
 
+  // The imm-arg 'size' can only be 128.
   def int_nvvm_fence_proxy_tensormap_generic_acquire_ # scope :
         Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
-                  [IntrNoCallback, IntrArgMemOnly, ImmArg<ArgIndex<1>>],
+                  [IntrNoCallback, IntrArgMemOnly, ImmArg<ArgIndex<1>>,
+                   Range<ArgIndex<1>, 128, 129>],
                   "llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>;
 }
 
@@ -1989,10 +1991,12 @@ def int_nvvm_is_explicit_cluster
               "llvm.nvvm.is_explicit_cluster">;
 
 // Setmaxnreg inc/dec intrinsics
+// The imm-arg should be in the range: 24 <= val <= 256
 foreach op = ["dec", "inc"] in
   def int_nvvm_setmaxnreg_ # op # _sync_aligned_u32
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
-              [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+              [IntrConvergent, IntrNoMem, IntrHasSideEffects,
+               ImmArg<ArgIndex<0>>, Range<ArgIndex<0>, 24, 257>]>;
 
 // Exit
 def int_nvvm_exit : NVVMBuiltin,
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 1f1041b259736..f0a4d7b6a4c1e 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6557,8 +6557,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     unsigned RegCount = cast<ConstantInt>(V)->getZExtValue();
     Check(RegCount % 8 == 0,
           "reg_count argument to nvvm.setmaxnreg must be in multiples of 8");
-    Check((RegCount >= 24 && RegCount <= 256),
-          "reg_count argument to nvvm.setmaxnreg must be within [24, 256]");
     break;
   }
   case Intrinsic::experimental_convergence_entry:
@@ -6605,14 +6603,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           "llvm.threadlocal.address operand isThreadLocal() must be true");
     break;
   }
-  case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cta:
-  case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cluster:
-  case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_gpu:
-  case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_sys: {
-    unsigned size = cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue();
-    Check(size == 128, " The only supported value for size operand is 128");
-    break;
-  }
   };
 
   // Verify that there aren't any unmediated control transfers between funclets.
diff --git a/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll b/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll
new file mode 100644
index 0000000000000..4fa7a7ae71001
--- /dev/null
+++ b/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll
@@ -0,0 +1,17 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @test_fence_proxy_tensormap_generic_acquire(ptr addrspace(0) %addr) {
+  ; CHECK: immarg value 127 out of range [128, 129)
+  call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr addrspace(0) %addr, i32 127);
+
+  ; CHECK: immarg value 129 out of range [128, 129)
+  call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr addrspace(0) %addr, i32 129);
+
+  ; CHECK: immarg value 127 out of range [128, 129)
+  call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr addrspace(0) %addr, i32 127);
+
+  ; CHECK: immarg value 129 out of range [128, 129)
+  call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr addrspace(0) %addr, i32 129);
+
+  ret void
+}
diff --git a/llvm/test/Verifier/NVPTX/setmaxnreg.ll b/llvm/test/Verifier/NVPTX/setmaxnreg.ll
index 8999e4ffa6679..1afebeab4742c 100644
--- a/llvm/test/Verifier/NVPTX/setmaxnreg.ll
+++ b/llvm/test/Verifier/NVPTX/setmaxnreg.ll
@@ -7,8 +7,10 @@ define void @test_set_maxn_reg() {
   ; CHECK: reg_count argument to nvvm.setmaxnreg must be in multiples of 8
   call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 95)
 
-  ; CHECK: reg_count argument to nvvm.setmaxnreg must be within [24, 256]
+  ; CHECK: immarg value 16 out of range [24, 257)
   call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 16)
 
+  ; CHECK: immarg value 264 out of range [24, 257)
+  call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 264)
   ret void
 }