[Mlir-commits] [mlir] [MLIR] Cleanup Pass Pipeline in sm_90 Integration Tests (PR #67416)

Tue Sep 26 04:48:28 PDT 2023

https://github.com/grypp created https://github.com/llvm/llvm-project/pull/67416

MLIR has begun supporting many features of Nvidia's sm_90 architecture, and new tests have been added for it. Although the tests worked well, there were redundancies in the pipeline. This PR cleans up unnecessary passes.

>From 4955bd8c3379802d0cb92bcbb7d338b6042863ab Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 26 Sep 2023 13:46:59 +0200
Subject: [PATCH] [MLIR] Cleanup Pass Pipeline in sm_90 Integration Tests

MLIR has begun supporting many features of Nvidia's sm_90 architecture, and new tests have been added for it. Although the tests worked well, there were redundancies in the pipeline. This PR cleans up unnecessary passes.
---
 .../sm90/tma_load_128x64_swizzle128b.mlir     | 30 ++++++--------
 .../CUDA/sm90/tma_load_64x64_swizzle128b.mlir | 41 +++++++------------
 .../sm90/tma_load_64x8_8x128_noswizzle.mlir   |  3 --
 3 files changed, 27 insertions(+), 47 deletions(-)

diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
index 6e32eb147d499ef..c9538ea3e6af531 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -1,20 +1,16 @@
-// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
-// RUN:         -convert-linalg-to-loops \
-// RUN:         -gpu-kernel-outlining \
-// RUN:         -convert-vector-to-scf  \
-// RUN:         -lower-affine \
-// RUN:         -convert-scf-to-cf \
-// RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
-// RUN:         -convert-scf-to-cf  \
-// RUN:         -convert-vector-to-llvm \
-// RUN:         -convert-math-to-llvm \
-// RUN:         -convert-index-to-llvm=index-bitwidth=32 \
-// RUN:         -convert-arith-to-llvm \
-// RUN:         -finalize-memref-to-llvm='use-opaque-pointers=1' \
-// RUN:         -convert-func-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
+// RUN: mlir-opt %s \
+// RUN:    -convert-nvgpu-to-nvvm \
+// RUN:    -gpu-kernel-outlining \
+// RUN:    -convert-vector-to-scf  \
+// RUN:    -convert-scf-to-cf \
+// RUN:    -convert-nvvm-to-llvm \
+// RUN:    -convert-vector-to-llvm \
+// RUN:    -convert-index-to-llvm=index-bitwidth=32 \
+// RUN:    -convert-arith-to-llvm \
+// RUN:    -finalize-memref-to-llvm='use-opaque-pointers=1' \
+// RUN:    -convert-func-to-llvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
 // RUN:  | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
 // RUN:  | mlir-opt --gpu-to-llvm --gpu-module-to-binary=format=%gpu_compilation_format -canonicalize -cse -reconcile-unrealized-casts \
 // RUN:  | mlir-cpu-runner \
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 140b0c2ecd77aa7..c75be107ca4c276 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -1,30 +1,17 @@
-// RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
-// RUN:         -convert-linalg-to-loops \
-// RUN:         -canonicalize -cse \
-// RUN:         -gpu-kernel-outlining \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-vector-to-scf  \
-// RUN:         -canonicalize -cse \
-// RUN:         -lower-affine \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-scf-to-cf \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-nvvm-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-nvgpu-to-nvvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-scf-to-cf  \
-// RUN:         -convert-vector-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -convert-math-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -lower-affine \
-// RUN:         -convert-index-to-llvm=index-bitwidth=32 \
-// RUN:         -convert-arith-to-llvm \
-// RUN:         -finalize-memref-to-llvm='use-opaque-pointers=1' \
-// RUN:         -convert-func-to-llvm \
-// RUN:         -canonicalize -cse \
-// RUN:         -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
+// RUN: mlir-opt %s \
+// RUN:    -convert-nvgpu-to-nvvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -gpu-kernel-outlining \
+// RUN:    -convert-vector-to-scf  \
+// RUN:    -convert-scf-to-cf \
+// RUN:    -convert-nvvm-to-llvm \
+// RUN:    -convert-vector-to-llvm \
+// RUN:    -convert-index-to-llvm=index-bitwidth=32 \
+// RUN:    -convert-arith-to-llvm \
+// RUN:    -finalize-memref-to-llvm='use-opaque-pointers=1' \
+// RUN:    -convert-func-to-llvm \
+// RUN:    -canonicalize -cse \
+// RUN:    -expand-strided-metadata --nvvm-attach-target="module=main_kernel features=+ptx80 chip=sm_90 O=3" \
 // RUN:  | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-index-to-llvm{index-bitwidth=32},canonicalize,cse))' \
 // RUN:  | mlir-opt --gpu-to-llvm --gpu-module-to-binary -canonicalize -cse -reconcile-unrealized-casts \
 // RUN:  | mlir-cpu-runner \
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
index 760ded16556ff8f..11cf63548a551bb 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -1,7 +1,6 @@
 // RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
 // RUN:         -gpu-kernel-outlining \
 // RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
 // RUN:         -convert-scf-to-cf  \
 // RUN:         -convert-vector-to-llvm \
 // RUN:         -convert-index-to-llvm=index-bitwidth=32 \
@@ -25,7 +24,6 @@
 // RUN: mlir-opt %s --convert-nvgpu-to-nvvm \
 // RUN:         -gpu-kernel-outlining \
 // RUN:         -convert-nvvm-to-llvm \
-// RUN:         -convert-nvgpu-to-nvvm \
 // RUN:         -convert-scf-to-cf  \
 // RUN:         -convert-vector-to-llvm \
 // RUN:         -convert-index-to-llvm=index-bitwidth=32 \
@@ -41,7 +39,6 @@
 // RUN:   --entry-point-result=void \
 // RUN:  | FileCheck %s
 
-
 // CHECK: [GPU] TMA BEFORE lhs[45][7] 0.000000
 // CHECK: [GPU] TMA BEFORE rhs[7][0] 0.000000
 // CHECK: [GPU] TMA LOADED lhs[45][7] 7.000000