[Mlir-commits] [mlir] [mlir][gpu] Introduce the `gpu.conditional_execution` op (PR #78013)

Mon Jan 15 04:35:29 PST 2024

fabianmcg wrote:

@grypp this op comes to provide similar functionality as the CUDA idiom:
```
__host__ __device__ int hostDevFn() {
#ifdef __CUDA_ARCH__
// cuda code
#else
// non-cuda code
#endif
}
```
Here's a better example of a use case with `gpu.launch`, where `thread_id` returns the GPU `thread_id` or 0 if called from the host. The input code is:
```
func.func @thread_id() -> index {
  %val = gpu.conditional_execution device {
    %id = gpu.thread_id x
    gpu.yield %id: index
  } host {
    %id = arith.constant 0 : index
    gpu.yield %id: index
  } -> index
  return %val : index
}
func.func @launch(%host: memref<index>, %dev: memref<index, 1>) {
  %c1 = arith.constant 1 : index
  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
                                       %grid_z = %c1)
             threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1,
                                        %block_z = %c1) {
    %id = func.call @thread_id() : () -> index
    memref.store %id, %dev[] : memref<index, 1>
    gpu.terminator
  }
  %id = func.call @thread_id() : () -> index
  memref.store %id, %host[] : memref<index>
  return
}
```
After applying `mlir-opt --gpu-kernel-outlining --gpu-resolve-conditional-execution --inline `, we obtain a code where the correct code section was resolved depending on the context.:
```
module attributes {gpu.container_module} {
  func.func @thread_id() -> index {
    %c0 = arith.constant 0 : index
    return %c0 : index
  }
  func.func @launch(%arg0: memref<index>, %arg1: memref<index, 1>) {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    gpu.launch_func  @launch_kernel::@launch_kernel blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1)  args(%arg1 : memref<index, 1>)
    memref.store %c0, %arg0[] : memref<index>
    return
  }
  gpu.module @launch_kernel {
    gpu.func @launch_kernel(%arg0: memref<index, 1>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>} {
      %0 = func.call @thread_id() : () -> index
      memref.store %0, %arg0[] : memref<index, 1>
      gpu.return
    }
    func.func @thread_id() -> index {
      %0 = gpu.thread_id  x
      return %0 : index
    }
  }
}
```

https://github.com/llvm/llvm-project/pull/78013