[Mlir-commits] [mlir] 8031a08 - [MLIR] Run the TMA test for sm_90

Mon Sep 4 09:15:42 PDT 2023

Author: Guray Ozen
Date: 2023-09-04T18:15:37+02:00
New Revision: 8031a088eb404ccd7490a589ecaa0fb76e73d2ef

URL: https://github.com/llvm/llvm-project/commit/8031a088eb404ccd7490a589ecaa0fb76e73d2ef
DIFF: https://github.com/llvm/llvm-project/commit/8031a088eb404ccd7490a589ecaa0fb76e73d2ef.diff

LOG: [MLIR] Run the TMA test for sm_90

TMA was introduced to MLIR, however, it needed `ptxas` compiler. Recent work D154117 introduced that!

This work runs the existing integration test.

Reviewed By: fmorac

Differential Revision: https://reviews.llvm.org/D159347

Added: 
    

Modified: 
    mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir
index ff7500e6b8ba31f..92a15c06e30d998 100644

--- a/mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tmaload.mlir
@@ -10,7 +10,7 @@
 // RUN:     -convert-func-to-llvm \
 // RUN:     -canonicalize \
 // RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm{use-opaque-pointers=1},lower-affine,convert-scf-to-cf,convert-vector-to-llvm,convert-math-to-llvm,expand-strided-metadata,lower-affine,convert-index-to-llvm{index-bitwidth=32},convert-arith-to-llvm,reconcile-unrealized-casts,gpu-to-cubin{chip=sm_90 features=+ptx80 dump-ptx}))' \
-// RUN: 2&>1 | FileCheck %s --check-prefixes=CHECK-PTX
+// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-PTX
 
 // CHECK-PTX: mbarrier.init.shared.b64
 // CHECK-PTX: mbarrier.arrive.expect_tx.shared.b64
@@ -19,6 +19,31 @@
 // CHECK-PTX: mbarrier.arrive.expect_tx.shared.b64
 // CHECK-PTX: mbarrier.try_wait.parity.shared.b64
 
+// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
+// RUN:         -gpu-kernel-outlining \
+// RUN:         -convert-nvvm-to-llvm \
+// RUN:         -convert-nvgpu-to-nvvm \
+// RUN:         -convert-scf-to-cf  \
+// RUN:         -convert-vector-to-llvm \
+// RUN:         -convert-index-to-llvm=index-bitwidth=32 \
+// RUN:         -convert-arith-to-llvm \
+// RUN:         -finalize-memref-to-llvm='use-opaque-pointers=1' \
+// RUN:         -convert-func-to-llvm \
+// RUN:         -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
+// RUN:  | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
+// RUN:  | mlir-opt --gpu-to-llvm --gpu-module-to-binary -canonicalize -cse -reconcile-unrealized-casts \
+// RUN:  | mlir-cpu-runner \
+// RUN:   --shared-libs=%mlir_cuda_runtime \
+// RUN:   --shared-libs=%mlir_runner_utils \
+// RUN:   --entry-point-result=void \
+// RUN:  | FileCheck %s
+
+
+// CHECK: [GPU] TMA BEFORE lhs[45][7] 0.000000
+// CHECK: [GPU] TMA BEFORE rhs[7][0] 0.000000
+// CHECK: [GPU] TMA LOADED lhs[45][7] 7.000000
+// CHECK: [GPU] TMA LOADED rhs[7][0] 3.000000
+
 module @mymod {
   memref.global "private" @bufferLhsGlobal : memref<64x8xf32, 3>
   memref.global "private" @bufferRhsGlobal : memref<8x128xf32, 3>
@@ -87,4 +112,4 @@ module @mymod {
     }
     return
   }
-}
\ No newline at end of file
+}