[Mlir-commits] [mlir] [MLIR][NVVM] Fixed assertion failure for insufficient parsing validation of nvvm dialect MMAOp (PR #163432)

Wed Oct 15 08:39:43 PDT 2025

https://github.com/smada3 updated https://github.com/llvm/llvm-project/pull/163432

>From b3e8d2f54c51bda9d4fda8ea6d62b78a72bf1f6d Mon Sep 17 00:00:00 2001
From: Stefan Mada <smada at nvidia.com>
Date: Tue, 14 Oct 2025 18:39:01 +0000
Subject: [PATCH 1/2] Fixed assertion failure for insufficient parsing
 validation of nvvm dialect with MMAOp

---
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp  | 20 ++++++++++++++++++++
 mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 13 +++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 5edcc40bd2d32..d785a8918cc4a 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -787,6 +787,26 @@ LogicalResult MmaOp::verify() {
                          " attribute");
   }
 
+  // Validate layout combinations. According to the operation description, most
+  // MMA operations require layoutA=row and layoutB=col. Only m8n8k4 with f16
+  // can use other layout combinations.
+  bool isM8N8K4_F16 =
+      (mmaShape[0] == 8 && mmaShape[1] == 8 && mmaShape[2] == 4 &&
+       getMultiplicandAPtxType() == MMATypes::f16);
+
+  if (!isM8N8K4_F16) {
+    // For all other shapes/types, layoutA must be row and layoutB must be col
+    if (getLayoutA() != MMALayout::row || getLayoutB() != MMALayout::col) {
+      return emitOpError("requires layoutA = #nvvm.mma_layout<row> and "
+                         "layoutB = #nvvm.mma_layout<col> for shape <")
+             << mmaShape[0] << ", " << mmaShape[1] << ", " << mmaShape[2]
+             << "> with element types "
+             << stringifyEnum(*getMultiplicandAPtxType()) << " and "
+             << stringifyEnum(*getMultiplicandBPtxType())
+             << ". Only m8n8k4 with f16 supports other layouts.";
+    }
+  }
+
   return success();
 }
 
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 0b3615487716d..cf5a9782c69ab 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -559,3 +559,16 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ
   %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
   llvm.return
 }
+
+// -----
+
+// Test that ensures invalid row/col layouts for matrices A and B are not accepted
+llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> {
+  // expected-error at +1 {{Only m8n8k4 with f16 supports other layouts.}}
+  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
+    {layoutA = #nvvm.mma_layout<col>, layoutB = #nvvm.mma_layout<col>,
+     multiplicandAPtxType = #nvvm.mma_type<s4>, multiplicandBPtxType = #nvvm.mma_type<s4>,
+     intOverflowBehavior=#nvvm.mma_int_overflow<satfinite>,
+     shape = #nvvm.shape<m = 16, n = 8, k = 32>} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)>
+  llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)>
+}
\ No newline at end of file

>From a78ad8dc21da4405f2307467ffc34e47ea788953 Mon Sep 17 00:00:00 2001
From: Stefan Mada <smada at nvidia.com>
Date: Wed, 15 Oct 2025 15:39:23 +0000
Subject: [PATCH 2/2] Added newline at end of test file

---
 mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index cf5a9782c69ab..187a4adc4002e 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -571,4 +571,4 @@ llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : i32, %
      intOverflowBehavior=#nvvm.mma_int_overflow<satfinite>,
      shape = #nvvm.shape<m = 16, n = 8, k = 32>} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)>
   llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)>
-}
\ No newline at end of file
+}