[Mlir-commits] [mlir] [MLIR][NVVM] [NFC] Update test cmd-lines and doc links (PR #128207)

Fri Feb 21 09:27:32 PST 2025

https://github.com/durga4github created https://github.com/llvm/llvm-project/pull/128207

For the NVVM Dialect tests under Target/LLVMIR/nvvm/ dir,
we verify the lowering to the intrinsics using mlir-translate.
Remove the -verify-diagnostics option from the cmd-line
for these tests since all the verifier checks are tested through
the nvvmir-invalid.mlir file. Similarly, remove the split-input-file
option which is not relevant here.

Update a few remaining links in the NVVMOps.td file.
All the reference links follow the same style now.

Rename the tcgen05-barriers.mlir file to tcgen05-commit.mlir
and move the wait/fence tests to a separate file.

>From 77961662702afb121032889aa5dc0a80d0555001 Mon Sep 17 00:00:00 2001
From: Durgadoss R <durgadossr at nvidia.com>
Date: Fri, 21 Feb 2025 18:13:28 +0530
Subject: [PATCH] [MLIR][NVVM] [NFC] Update test cmd-lines and doc links

For the NVVM Dialect tests under Target/LLVMIR/nvvm/ dir,
we verify the lowering to the intrinsics using mlir-translate.
For these tests, remove the -verify-diagnostics option from
the cmd-line since all the verifier checks are tested through
the nvvmir-invalid.mlir file. Similarly, remove the
split-input-file option which is not relevant here.

Fix a few remaining links in the NVVMOps.td file.
All the reference links follow the same style now.

Rename the tcgen05-barriers.mlir file to tcgen05-commit.mlir
and move the wait/fence tests to a separate file.

Signed-off-by: Durgadoss R <durgadossr at nvidia.com>
---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   | 18 +++++--------
 mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir    |  2 +-
 .../Target/LLVMIR/nvvm/tcgen05-alloc.mlir     |  3 +--
 ...en05-barriers.mlir => tcgen05-commit.mlir} | 25 +------------------
 mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir  |  2 +-
 .../LLVMIR/nvvm/tcgen05-fence-wait.mlir       | 23 +++++++++++++++++
 .../Target/LLVMIR/nvvm/tcgen05-shift.mlir     |  2 +-
 .../Target/LLVMIR/nvvm/tma_bulk_copy.mlir     |  3 +--
 .../test/Target/LLVMIR/nvvm/tma_prefetch.mlir |  2 +-
 .../Target/LLVMIR/nvvm/tma_store_reduce.mlir  |  2 +-
 10 files changed, 37 insertions(+), 45 deletions(-)
 rename mlir/test/Target/LLVMIR/nvvm/{tcgen05-barriers.mlir => tcgen05-commit.mlir} (66%)
 create mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-fence-wait.mlir

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0692e8e32dbf8..633e4aaba5462 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2648,8 +2648,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
     the amount specified by `nCols` and writes the destination
     address to the `addr` argument. The `nCols` operand specifies the
     number of columns to be allocated and it must be a power-of-two.
-    [For more information, refer to the PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
   }];
 
   let arguments = (ins
@@ -2679,8 +2678,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
     specified by `tmemAddr`, which must be from a previous tensor
     memory allocation. The `nCols` operand specifies the number
     of columns to be de-allocated, and it must be a power-of-two.
-    [For more information, refer to the PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
   }];
 
   let arguments = (ins LLVM_PointerTensor:$taddr, I32:$nCols,
@@ -2708,8 +2706,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
     of the executing thread is relinquishing the right to allocate
     Tensor Memory. So, it is illegal for a CTA to perform `tcgen05.alloc`
     after any of its constituent threads execute `tcgen05.relinquish_alloc_permit`.
-    [For more information, refer to the PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
   }];
 
   let arguments = (ins
@@ -2733,8 +2730,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
     The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
     with respect to the prior tcgen05 and execution ordering operations.
 
-    [For more information refer to the PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
   }];
 
   let arguments = (ins Tcgen05FenceKindAttr:$kind);
@@ -2756,8 +2752,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
     have completed. Similarly, the `tcgen05.wait<store>` causes the executing
     thread to block until all prior `tcgen05.st` operations issued by the
     executing thread have completed.
-    [For more information refer PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
   }];
 
   let arguments = (ins Tcgen05WaitKindAttr:$kind);
@@ -2782,8 +2777,7 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
     when present, specifies the destination CTAs in the cluster such
     that each bit position in the 16-bit `multicastMask` operand
     corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
-    [For more information refer PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
   }];
 
   let arguments = (ins
diff --git a/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir b/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir
index ff7bad0149d4c..2bce9e1a5d3e4 100644
--- a/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: @convert_float_to_tf32_rna
 llvm.func @convert_float_to_tf32_rna(%src : f32) -> i32 {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir
index 781efa2567111..6a7e4ac515b81 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir
@@ -1,5 +1,4 @@
-// RUN: mlir-opt -split-input-file -verify-diagnostics %s
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
 
 // CHECK-LABEL: @llvm_nvvm_tcgen05_alloc
 llvm.func @llvm_nvvm_tcgen05_alloc(%addr : !llvm.ptr, %ncols : i32) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir
similarity index 66%
rename from mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir
rename to mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir
index 7536a4567e34e..80cf29f3704c2 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir
@@ -1,27 +1,4 @@
-// RUN: mlir-opt -split-input-file -verify-diagnostics %s
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
-
-// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
-llvm.func @llvm_nvvm_tcgen05_fence() {
-  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
-  nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
-
-  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
-  nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>
-
-  llvm.return
-}
-
-// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
-llvm.func @llvm_nvvm_tcgen05_wait() {
-  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
-  nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
-
-  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
-  nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>
-
-  llvm.return
-}
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
 
 // CHECK-LABEL: @llvm_nvvm_tcgen05_commit_generic
 llvm.func @llvm_nvvm_tcgen05_commit_generic(%barrier : !llvm.ptr, %cta_mask : i16) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir
index 91128cd00c873..bf72714d16de7 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: @nvvm_tcgen05_cp_128x256b
 llvm.func @nvvm_tcgen05_cp_128x256b(%taddr : !llvm.ptr<6>, %smem_desc : i64) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-fence-wait.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-fence-wait.mlir
new file mode 100644
index 0000000000000..ee4a517a4bffa
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-fence-wait.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=CHECK-LLVM
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
+llvm.func @llvm_nvvm_tcgen05_fence() {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
+  nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
+  nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>
+
+  llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
+llvm.func @llvm_nvvm_tcgen05_wait() {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
+  nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
+  nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>
+
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir
index 48753a3fdb21b..78c50cf96cf90 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: @llvm_nvvm_tcgen05_shift
 llvm.func @llvm_nvvm_tcgen05_shift(%taddr : !llvm.ptr<6>) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_bulk_copy.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_bulk_copy.mlir
index aa2d680f5117e..0e3f98a134491 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tma_bulk_copy.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tma_bulk_copy.mlir
@@ -1,5 +1,4 @@
-// RUN: mlir-opt -split-input-file -verify-diagnostics %s
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: @llvm_nvvm_cp_async_bulk_global_to_shared_cluster
 llvm.func @llvm_nvvm_cp_async_bulk_global_to_shared_cluster(%dst : !llvm.ptr<3>, %src : !llvm.ptr<1>, %mbar : !llvm.ptr<3>, %size : i32, %mc : i16, %ch : i64) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir
index 7be29fd616a6f..f1fa3b61f2dd9 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: @tma_prefetch_1d
 llvm.func @tma_prefetch_1d(%tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir
index 3809bc0bce897..6e0b48489e8b0 100644
--- a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir -split-input-file --verify-diagnostics %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // CHECK-LABEL: define void @tma_store_reduce_1d(
 llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) {