[Mlir-commits] [mlir] [nvgpu][mlir] Fix wgmma store offset (PR #154581)

Wed Aug 20 10:34:32 PDT 2025

https://github.com/linuxlonelyeagle created https://github.com/llvm/llvm-project/pull/154581

When M is greater than 64 in wgmma, the accumulator type should consist of multiple struct types. Calculating the offset of memref on the row based on the number of elements in the struct type is incorrect. For fp16, this offset is 64.

>From d62a27ea20e4e4242e9c53d10df3aead1d7e4428 Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Wed, 20 Aug 2025 17:28:24 +0000
Subject: [PATCH] Fix wgmma store offset

---
 mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index c6c5ab356f256..fffcb2aedafee 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -1623,11 +1623,10 @@ struct NVGPUWarpgroupMmaStoreOpLowering
     Value matriDValue = adaptor.getMatrixD();
     auto stype = cast<LLVM::LLVMStructType>(matriDValue.getType());
     for (auto [idx, matrixD] : llvm::enumerate(stype.getBody())) {
-      auto structType = cast<LLVM::LLVMStructType>(matrixD);
       Value innerStructValue =
           LLVM::ExtractValueOp::create(b, matriDValue, idx);
       storeFragmentedMatrix(b, innerStructValue, op.getDstMemref(), offset);
-      offset += structType.getBody().size();
+      offset += kWgmmaSizeM;
     }
     rewriter.eraseOp(op);
     return success();