[Mlir-commits] [mlir] f4d5952 - [mlir] Fix sm90 test for new verifier

Fri Nov 10 07:50:09 PST 2023

Author: Guray Ozen
Date: 2023-11-10T16:50:01+01:00
New Revision: f4d59522cff942f57578ac4c44b70a48298d5e7b

URL: https://github.com/llvm/llvm-project/commit/f4d59522cff942f57578ac4c44b70a48298d5e7b
DIFF: https://github.com/llvm/llvm-project/commit/f4d59522cff942f57578ac4c44b70a48298d5e7b.diff

LOG: [mlir] Fix sm90 test for new verifier

#70923 improved verifier. The verifier caught that the tensor map type in the tma descriptor in this test isn't correct. The program was working correctly anway since the offset is calculated correctly.

This work fixes the test.

Added: 
    

Modified: 
    mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 4ce8db0f2cba212..a078cf3a205468f 100644

--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -35,7 +35,7 @@
 
 !rhs = memref<64x128xf16>
 !shmemrhs = memref<64x128xf16, 3>
-!rhsTensorMap = !nvgpu.tensormap.descriptor<tensor = !shmemrhs, swizzle = swizzle_128b, l2promo=none, oob=zero, interleave=none>
+!rhsTensorMap = !nvgpu.tensormap.descriptor<tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo=none, oob=zero, interleave=none>
 
 module @mymod {
   func.func private @printMemrefF32(memref<*xf32>)
@@ -99,7 +99,8 @@ module @mymod {
       %6 = gpu.thread_id  x
       %lhsShmem = memref.get_global @bufferLhsGlobal : !shmemlhs
       %rhsShmem = memref.get_global @bufferRhsGlobal : !shmemrhs
-      %rhsShmem2 = memref.subview %rhsShmem[32, 0][128, 64][1, 1] : !shmemrhs to memref<128x64xf16, strided<[128, 1], offset: 4096>, 3>
+      %rhsShmem1 = memref.subview %rhsShmem[0, 0][64, 64][1, 1] : !shmemrhs to memref<64x64xf16, strided<[128, 1]>, 3>
+      %rhsShmem2 = memref.subview %rhsShmem[32, 0][64, 64][1, 1] : !shmemrhs to memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
     
       // Step 5. Initialize the mbarrier
       %9 = nvgpu.mbarrier.create -> !barrierType
@@ -110,8 +111,8 @@ module @mymod {
       scf.if %10 {
         gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
         nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
-        nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem : !rhsTensorMap, !barrierType -> !shmemrhs
-        nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<128x64xf16, strided<[128, 1], offset: 4096>, 3>
+        nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
+        nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c32768 : !barrierType
       } else {
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c0 : !barrierType