[Mlir-commits] [mlir] [MLIR][NVVM] Update SM version requirements of Ops (PR #192257)

Fri Apr 17 04:26:46 PDT 2026

https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/192257

>From 54cadd73da00839c11f93937752a02bd705ce58f Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Wed, 15 Apr 2026 08:29:12 +0000
Subject: [PATCH 1/3] [MLIR][NVVM] Update SM version requirements of Ops

This change updates the SM version requirements of Ops with the
`NVVMRequiresSM` trait to include family-specific SM versions wherever
applicable.
---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 48 +++++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 9b2a8985a1a44..ec3820b174995 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -5075,7 +5075,8 @@ def Tcgen05WaitKindAttr :
   let assemblyFormat = "`<` $value `>`";
 }
 
-def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 alloc operation";
   let description = [{
     The `tcgen05.alloc` Op allocates tensor core memory for
@@ -5105,7 +5106,8 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSMa<[100, 101]>]
   }];
 }
 
-def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", 
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 dealloc operation";
   let description = [{
     The `tcgen05.dealloc` Op de-allocates the tensor core memory
@@ -5133,7 +5135,8 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSMa<[100, 10
   }];
 }
 
-def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 Op to relinquish the right to allocate";
   let description = [{
     The `tcgen05.relinquish_alloc_permit` Op specifies that the CTA
@@ -5156,7 +5159,8 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
   }];
 }
 
-def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", 
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 fence operations";
   let description = [{
     The `tcgen05.fence<before>` orders all prior async tcgen05 operations
@@ -5178,7 +5182,8 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", [NVVMRequiresSMa<[100, 101]>]
   }];
 }
 
-def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 wait operations";
   let description = [{
     The `tcgen05.wait<load>` causes the executing thread to block until
@@ -5200,7 +5205,8 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait", [NVVMRequiresSMa<[100, 101]>]>
   }];
 }
 
-def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 commit operations";
   let description = [{
     The `tcgen05.commit` makes the *mbarrier object*, specified by
@@ -5238,7 +5244,8 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSMa<[100, 101]
   }];
 }
 
-def NVVM_Tcgen05ShiftOp : NVVM_Op<"tcgen05.shift", [NVVMRequiresSMa<[100, 101, 103]>]> {
+def NVVM_Tcgen05ShiftOp : NVVM_Op<"tcgen05.shift",
+    [NVVMRequiresSMa<[100, 101, 103, 110]>]> {
   let summary = "Tcgen05 shift operation";
   let description = [{
     The `tcgen05.shift` is an asynchronous instruction which initiates
@@ -5304,7 +5311,8 @@ def Tcgen05CpSrcFormatAttr : EnumAttr<NVVM_Dialect, Tcgen05CpSrcFormat, "tcgen05
   let assemblyFormat = "`<` $value `>`";
 }
 
-def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Tcgen05 copy operation";
   let description = [{
     Instruction tcgen05.cp initiates an asynchronous copy operation from
@@ -5440,7 +5448,8 @@ def Tcgen05LdStShapeAttr: EnumAttr<NVVM_Dialect, Tcgen05LdStShape, "tcgen05_ldst
 // NVVM tcgen05.ld Op
 //===----------------------------------------------------------------------===//
 
-def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "tensor memory load instructions";
   let arguments = (ins
     // Attributes
@@ -5533,7 +5542,7 @@ def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld", [NVVMRequiresSMa<[100, 101]>]> {
 //===----------------------------------------------------------------------===//
 
 def NVVM_Tcgen05LdRedOp : NVVM_Op<"tcgen05.ld.red",
-                          [NVVMRequiresSMa<[101, 110]>]> {
+                          [NVVMRequiresSMaOrSMf<[101, 110], [101, 103, 110]>]> {
   let summary = "Tcgen05 tensor memory load and reduce instructions";
   let arguments = (ins
     Tcgen05LdStShapeAttr:$shape,
@@ -5622,7 +5631,8 @@ def NVVM_Tcgen05LdRedOp : NVVM_Op<"tcgen05.ld.red",
 // NVVM tcgen05.st Op
 //===----------------------------------------------------------------------===//
 
-def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st", [NVVMRequiresSMa<[100, 101]>]> {
+def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st",
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "tensor memory store instructions";
   let arguments = (ins
     // Attributes
@@ -6001,8 +6011,8 @@ defvar Tcgen05MMABlockScaleKindAttr =
     [EnumAttrIsOneOf<Tcgen05MMAKindAttr, Tcgen05MMABlockScaleKindList>]>;
 
 def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma",
-                          [AttrSizedOperandSegments,
-                           NVVMRequiresSMa<[100, 110]>]> {
+    [AttrSizedOperandSegments,
+     NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6085,8 +6095,8 @@ def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma",
 }
 
 def NVVM_Tcgen05MMASparseOp : NVVM_Op<"tcgen05.mma.sp",
-                                      [AttrSizedOperandSegments,
-                                       NVVMRequiresSMa<[100, 110]>]> {
+    [AttrSizedOperandSegments,
+     NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs MMA operation with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{
@@ -6167,7 +6177,7 @@ def Tcgen05MMABlockScaleAttr : EnumAttr<NVVM_Dialect, Tcgen05MMABlockScale,
 }
 
 def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale",
-                                          [NVVMRequiresSMa<[100, 110]>]> {
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs block scaled MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6240,7 +6250,7 @@ def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale",
 }
 
 def NVVM_Tcgen05MMASparseBlockScaleOp : NVVM_Op<"tcgen05.mma.sp.block_scale",
-                                                [NVVMRequiresSMa<[100, 110]>]> {
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs block scaled MMA operation with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{
@@ -6322,7 +6332,7 @@ def Tcgen05MMACollectorBBufferAttr : EnumAttr<NVVM_Dialect, Tcgen05MMACollectorB
 }
 
 def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws",
-                                  [NVVMRequiresSMa<[100, 110]>]> {
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs weight stationary convolution MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6392,7 +6402,7 @@ def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws",
 }
 
 def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
-                                        [NVVMRequiresSMa<[100, 110]>]> {
+    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
   let summary = "Performs weight stationary convolution MMA with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{

>From b7a5c821755b9fcfbbb93aebf86b5eba9930cf77 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Fri, 17 Apr 2026 08:19:30 +0000
Subject: [PATCH 2/3] address comments

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 43 ++++++++-------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index ec3820b174995..c892ee18166f2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -5075,8 +5075,7 @@ def Tcgen05WaitKindAttr :
   let assemblyFormat = "`<` $value `>`";
 }
 
-def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 alloc operation";
   let description = [{
     The `tcgen05.alloc` Op allocates tensor core memory for
@@ -5106,8 +5105,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc",
   }];
 }
 
-def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", 
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 dealloc operation";
   let description = [{
     The `tcgen05.dealloc` Op de-allocates the tensor core memory
@@ -5136,7 +5134,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc",
 }
 
 def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 Op to relinquish the right to allocate";
   let description = [{
     The `tcgen05.relinquish_alloc_permit` Op specifies that the CTA
@@ -5159,8 +5157,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
   }];
 }
 
-def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", 
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 fence operations";
   let description = [{
     The `tcgen05.fence<before>` orders all prior async tcgen05 operations
@@ -5182,8 +5179,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence",
   }];
 }
 
-def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 wait operations";
   let description = [{
     The `tcgen05.wait<load>` causes the executing thread to block until
@@ -5205,8 +5201,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait",
   }];
 }
 
-def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 commit operations";
   let description = [{
     The `tcgen05.commit` makes the *mbarrier object*, specified by
@@ -5311,8 +5306,7 @@ def Tcgen05CpSrcFormatAttr : EnumAttr<NVVM_Dialect, Tcgen05CpSrcFormat, "tcgen05
   let assemblyFormat = "`<` $value `>`";
 }
 
-def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Tcgen05 copy operation";
   let description = [{
     Instruction tcgen05.cp initiates an asynchronous copy operation from
@@ -5448,8 +5442,7 @@ def Tcgen05LdStShapeAttr: EnumAttr<NVVM_Dialect, Tcgen05LdStShape, "tcgen05_ldst
 // NVVM tcgen05.ld Op
 //===----------------------------------------------------------------------===//
 
-def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "tensor memory load instructions";
   let arguments = (ins
     // Attributes
@@ -5631,8 +5624,7 @@ def NVVM_Tcgen05LdRedOp : NVVM_Op<"tcgen05.ld.red",
 // NVVM tcgen05.st Op
 //===----------------------------------------------------------------------===//
 
-def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st", [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "tensor memory store instructions";
   let arguments = (ins
     // Attributes
@@ -6011,8 +6003,7 @@ defvar Tcgen05MMABlockScaleKindAttr =
     [EnumAttrIsOneOf<Tcgen05MMAKindAttr, Tcgen05MMABlockScaleKindList>]>;
 
 def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma",
-    [AttrSizedOperandSegments,
-     NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [AttrSizedOperandSegments, NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6095,8 +6086,7 @@ def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma",
 }
 
 def NVVM_Tcgen05MMASparseOp : NVVM_Op<"tcgen05.mma.sp",
-    [AttrSizedOperandSegments,
-     NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [AttrSizedOperandSegments, NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs MMA operation with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{
@@ -6177,7 +6167,7 @@ def Tcgen05MMABlockScaleAttr : EnumAttr<NVVM_Dialect, Tcgen05MMABlockScale,
 }
 
 def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs block scaled MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6250,7 +6240,7 @@ def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale",
 }
 
 def NVVM_Tcgen05MMASparseBlockScaleOp : NVVM_Op<"tcgen05.mma.sp.block_scale",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs block scaled MMA operation with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{
@@ -6332,7 +6322,7 @@ def Tcgen05MMACollectorBBufferAttr : EnumAttr<NVVM_Dialect, Tcgen05MMACollectorB
 }
 
 def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs weight stationary convolution MMA operation on 5th-gen tensor cores";
 
   let description = [{
@@ -6402,7 +6392,7 @@ def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws",
 }
 
 def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
-    [NVVMRequiresSMaOrSMf<[100, 101, 110], [100, 101, 110]>]> {
+    [NVVMRequiresSMf<[100, 101, 110]>]> {
   let summary = "Performs weight stationary convolution MMA with sparse A matrix on 5th-gen tensor cores";
 
   let description = [{
@@ -6725,7 +6715,8 @@ def TensormapFieldValueAttr :
     TensormapSwizzleModeAttr, TensormapSwizzleAtomicityAttr, 
     TensormapFillModeAttr]>;
 
-def NVVM_TensormapReplaceOp : NVVM_VoidIntrinsicOp<"tensormap.replace"> {
+def NVVM_TensormapReplaceOp : NVVM_VoidIntrinsicOp<"tensormap.replace",
+    [NVVMRequiresSMaOrSMf<[90, 103], [100, 101, 110, 120]>]> {
   let summary = "Modifies a field of the tensor-map object";
   let description = [{
     The `nvvm.tensormap.replace` replaces the specified field of the tensor-map 

>From 4e6cbf3ae5fc7376e59e669a34c9f19d38cbbf49 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Fri, 17 Apr 2026 11:26:10 +0000
Subject: [PATCH 3/3] add tests for updated Ops

---
 .../Dialect/LLVMIR/nvvm_check_target_sm.mlir  | 229 ++++++++++++++++++
 ...M.mlir => nvvm_check_target_sm_trait.mlir} |   0
 2 files changed, 229 insertions(+)
 create mode 100644 mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir
 rename mlir/test/Dialect/LLVMIR/{nvvm-check-targetSM.mlir => nvvm_check_target_sm_trait.mlir} (100%)

diff --git a/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir b/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir
new file mode 100644
index 0000000000000..ff90ad47ba410
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir
@@ -0,0 +1,229 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics
+
+gpu.module @tcgen05_alloc_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tcgen05_alloc_sm90(%addr: !llvm.ptr, %ncols: i32) {
+    // expected-error @below {{'nvvm.tcgen05.alloc' op is not supported on sm_90}}
+    nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr, i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_alloc_sm100 [#nvvm.target<chip = "sm_100">] {
+  func.func @tcgen05_alloc_sm100(%addr: !llvm.ptr, %ncols: i32) {
+    // expected-error @below {{'nvvm.tcgen05.alloc' op is not supported on sm_100}}
+    nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr, i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_dealloc_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_dealloc_sm90a(%taddr: !llvm.ptr<6>, %ncols: i32) {
+    // expected-error @below {{'nvvm.tcgen05.dealloc' op is not supported on sm_90a}}
+    nvvm.tcgen05.dealloc %taddr, %ncols : !llvm.ptr<6>, i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_relinquish_alloc_permit_sm100 [#nvvm.target<chip = "sm_100">] {
+  func.func @tcgen05_relinquish_alloc_permit_sm100() {
+    // expected-error @below {{'nvvm.tcgen05.relinquish_alloc_permit' op is not supported on sm_100}}
+    nvvm.tcgen05.relinquish_alloc_permit
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_fence_sm120f [#nvvm.target<chip = "sm_120f">] {
+  func.func @tcgen05_fence_sm120f() {
+    // expected-error @below {{'nvvm.tcgen05.fence' op is not supported on sm_120f}}
+    nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_wait_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tcgen05_wait_sm90() {
+    // expected-error @below {{'nvvm.tcgen05.wait' op is not supported on sm_90}}
+    nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_commit_sm100 [#nvvm.target<chip = "sm_100">] {
+  func.func @tcgen05_commit_sm100(%barrier: !llvm.ptr) {
+    // expected-error @below {{'nvvm.tcgen05.commit' op is not supported on sm_100}}
+    nvvm.tcgen05.commit %barrier : !llvm.ptr
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_cp_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_cp_sm90a(%taddr: !llvm.ptr<6>, %sdesc: i64) {
+    // expected-error @below {{'nvvm.tcgen05.cp' op is not supported on sm_90a}}
+    nvvm.tcgen05.cp %taddr, %sdesc {shape = #nvvm.tcgen05_cp_shape<shape_128x256b>}
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_ld_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tcgen05_ld_sm90(%taddr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.ld' op is not supported on sm_90}}
+    %0 = nvvm.tcgen05.ld %taddr {shape = #nvvm.tcgen05_ldst_shape<shape_16x64b>} : i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_st_sm120f [#nvvm.target<chip = "sm_120f">] {
+  func.func @tcgen05_st_sm120f(%taddr: !llvm.ptr<6>, %val: i32) {
+    // expected-error @below {{'nvvm.tcgen05.st' op is not supported on sm_120f}}
+    nvvm.tcgen05.st %taddr, %val {shape = #nvvm.tcgen05_ldst_shape<shape_16x64b>} : i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tcgen05_mma_sm90(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1) {
+    // expected-error @below {{'nvvm.tcgen05.mma' op is not supported on sm_90}}
+    nvvm.tcgen05.mma %d, %a, %b, %idesc, %eid {kind = #nvvm.tcgen05_mma_kind<f16>, ctaGroup = #nvvm.cta_group<cta_1>} : (!llvm.ptr<6>, i64, i64, i32, i1)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_sp_sm100 [#nvvm.target<chip = "sm_100">] {
+  func.func @tcgen05_mma_sp_sm100(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.mma.sp' op is not supported on sm_100}}
+    nvvm.tcgen05.mma.sp %d, %a, %b, %idesc, %eid, %sp {kind = #nvvm.tcgen05_mma_kind<f16>, ctaGroup = #nvvm.cta_group<cta_1>} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_block_scale_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_mma_block_scale_sm90a(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sa: !llvm.ptr<6>, %sb: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.mma.block_scale' op is not supported on sm_90a}}
+    nvvm.tcgen05.mma.block_scale %d, %a, %b, %idesc, %eid, %sa, %sb {kind = #nvvm.tcgen05_mma_kind<mxf8f6f4>, ctaGroup = #nvvm.cta_group<cta_1>} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_sp_block_scale_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tcgen05_mma_sp_block_scale_sm90(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>, %sa: !llvm.ptr<6>, %sb: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.mma.sp.block_scale' op is not supported on sm_90}}
+    nvvm.tcgen05.mma.sp.block_scale %d, %a, %b, %idesc, %eid, %sp, %sa, %sb {kind = #nvvm.tcgen05_mma_kind<mxf8f6f4>, ctaGroup = #nvvm.cta_group<cta_1>} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_ws_sm120f [#nvvm.target<chip = "sm_120f">] {
+  func.func @tcgen05_mma_ws_sm120f(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1) {
+    // expected-error @below {{'nvvm.tcgen05.mma.ws' op is not supported on sm_120f}}
+    nvvm.tcgen05.mma.ws %d, %a, %b, %idesc, %eid {kind = #nvvm.tcgen05_mma_kind<f16>} : (!llvm.ptr<6>, i64, i64, i32, i1)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_mma_ws_sp_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_mma_ws_sp_sm90a(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.mma.ws.sp' op is not supported on sm_90a}}
+    nvvm.tcgen05.mma.ws.sp %d, %a, %b, %idesc, %eid, %sp {kind = #nvvm.tcgen05_mma_kind<f16>} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>)
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_shift_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_shift_sm90a(%taddr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.shift' op is not supported on sm_90a}}
+    nvvm.tcgen05.shift %taddr : !llvm.ptr<6>
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_shift_sm100f [#nvvm.target<chip = "sm_100f">] {
+  func.func @tcgen05_shift_sm100f(%taddr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.shift' op is not supported on sm_100f}}
+    nvvm.tcgen05.shift %taddr : !llvm.ptr<6>
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_shift_sm100 [#nvvm.target<chip = "sm_100">] {
+  func.func @tcgen05_shift_sm100(%taddr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.shift' op is not supported on sm_100}}
+    nvvm.tcgen05.shift %taddr : !llvm.ptr<6>
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_ld_red_sm100a [#nvvm.target<chip = "sm_100a">] {
+  func.func @tcgen05_ld_red_sm100a(%addr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.ld.red' op is not supported on sm_100a}}
+    %data, %rv = nvvm.tcgen05.ld.red min %addr {shape = #nvvm.tcgen05_ldst_shape<shape_32x32b>} : vector<2xi32>, i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tcgen05_ld_red_sm90a [#nvvm.target<chip = "sm_90a">] {
+  func.func @tcgen05_ld_red_sm90a(%addr: !llvm.ptr<6>) {
+    // expected-error @below {{'nvvm.tcgen05.ld.red' op is not supported on sm_90a}}
+    %data, %rv = nvvm.tcgen05.ld.red min %addr {shape = #nvvm.tcgen05_ldst_shape<shape_32x32b>} : vector<2xi32>, i32
+    return
+  }
+}
+
+// -----
+
+gpu.module @tensormap_replace_sm80 [#nvvm.target<chip = "sm_80">] {
+  func.func @tensormap_replace_sm80(%addr: !llvm.ptr<1>, %nv: i64) {
+    // expected-error @below {{'nvvm.tensormap.replace' op is not supported on sm_80}}
+    nvvm.tensormap.replace field = global_address, new_value = %nv in %addr : !llvm.ptr<1>, i64
+    return
+  }
+}
+
+// -----
+
+gpu.module @tensormap_replace_sm90 [#nvvm.target<chip = "sm_90">] {
+  func.func @tensormap_replace_sm90(%addr: !llvm.ptr<1>, %nv: i64) {
+    // expected-error @below {{'nvvm.tensormap.replace' op is not supported on sm_90}}
+    nvvm.tensormap.replace field = global_address, new_value = %nv in %addr : !llvm.ptr<1>, i64
+    return
+  }
+}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-check-targetSM.mlir b/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm_trait.mlir
similarity index 100%
rename from mlir/test/Dialect/LLVMIR/nvvm-check-targetSM.mlir
rename to mlir/test/Dialect/LLVMIR/nvvm_check_target_sm_trait.mlir