[Mlir-commits] [mlir] [mlir]introduce UnrollScopeInterface and apply it to funcOp and gpu.launch Op. (PR #123904)
Krzysztof Drewniak
llvmlistbot at llvm.org
Fri Jan 24 14:00:00 PST 2025
krzysz00 wrote:
```mlir
// example
func.func @gpu_launch_unroll() {
%buf = gpu.alloc() : memref<2x4x2x2xf16, #gpu.address_space<global>>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
%cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
%0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) {
%1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) {
%cst_0 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
%2 = vector.insert %cst_0, %arg15 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16>
affine.yield %2 : vector<2x4x2x2xf16>
}
affine.yield %1 : vector<2x4x2x2xf16>
}
vector.transfer_write %0, %buf[%c0, %c0, %c0, %c0] {inbounds = [true, true, true, true]} : vector<2x4x2x2xf16>, memref<2x4x2x2xf16, #gpu.address_space<global>>
gpu.terminator
}
gpu.dealloc %buf : memref<2x4x2x2xf16, #gpu.address_space<global>>
return
}
```
If I run this through `mlir-opt -affine-loop-unroll="unroll-full" -canonicalize -cse -gpu-launch-sink-index-computations -gpu-kernel-outlining -canonicalize` it gives
```
module attributes {gpu.container_module} {
func.func @gpu_launch_unroll() {
%c1 = arith.constant 1 : index
%memref = gpu.alloc () : memref<2x4x2x2xf16, #gpu.address_space<global>>
gpu.launch_func @gpu_launch_unroll_kernel::@gpu_launch_unroll_kernel blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<2x4x2x2xf16, #gpu.address_space<global>>)
gpu.dealloc %memref : memref<2x4x2x2xf16, #gpu.address_space<global>>
return
}
gpu.module @gpu_launch_unroll_kernel {
gpu.func @gpu_launch_unroll_kernel(%arg0: memref<2x4x2x2xf16, #gpu.address_space<global>>) kernel attributes {known_block_size = array<i32: 1, 1, 1>, known_grid_size = array<i32: 1, 1, 1>} {
%cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
%c3 = arith.constant 3 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst_0 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
%0 = affine.for %arg1 = 0 to 2 iter_args(%arg2 = %cst) -> (vector<2x4x2x2xf16>) {
%1 = vector.insert %cst_0, %arg2 [%arg1, %c0] : vector<2x2xf16> into vector<2x4x2x2xf16>
%2 = vector.insert %cst_0, %1 [%arg1, %c1] : vector<2x2xf16> into vector<2x4x2x2xf16>
%3 = vector.insert %cst_0, %2 [%arg1, %c2] : vector<2x2xf16> into vector<2x4x2x2xf16>
%4 = vector.insert %cst_0, %3 [%arg1, %c3] : vector<2x2xf16> into vector<2x4x2x2xf16>
affine.yield %4 : vector<2x4x2x2xf16>
}
vector.transfer_write %0, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true], inbounds = [true, true, true, true]} : vector<2x4x2x2xf16>, memref<2x4x2x2xf16, #gpu.address_space<global>>
gpu.return
}
}
}
```
https://github.com/llvm/llvm-project/pull/123904
More information about the Mlir-commits
mailing list