[Mlir-commits] [mlir] 56ffb8d - [mlir] Stop allowing LLVMType Int arguments for GPULaunchFuncOp.

Thu Sep 24 02:16:39 PDT 2020

Author: Alexander Belyaev
Date: 2020-09-24T11:16:23+02:00
New Revision: 56ffb8d1697932c1097fba3315dba6d9e55b7bb8

URL: https://github.com/llvm/llvm-project/commit/56ffb8d1697932c1097fba3315dba6d9e55b7bb8
DIFF: https://github.com/llvm/llvm-project/commit/56ffb8d1697932c1097fba3315dba6d9e55b7bb8.diff

LOG: [mlir] Stop allowing LLVMType Int arguments for GPULaunchFuncOp.

Conversion to LLVM becomes confusing and incorrect if someone tries to lower
STD -> LLVM and only then GPULaunchFuncOp to LLVM separately. Although it is
technically allowed now, it works incorrectly because of the argument
promotion. The correct way to use this conversion pattern is to add to the
STD->LLVM patterns before running the pass.

Differential Revision: https://reviews.llvm.org/D88147

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/GPUOps.td
    mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index d3d8cbac72b2..fd43065e9693 100644

--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -18,12 +18,9 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
-// Type constraint accepting standard integers, indices and wrapped LLVM integer
-// types.
-def IntLikeOrLLVMInt : TypeConstraint<
-  Or<[AnySignlessInteger.predicate, Index.predicate,
-      LLVM_AnyInteger.predicate]>,
-  "integer, index or LLVM dialect equivalent">;
+// Type constraint accepting standard integers, indices.
+def IntOrIndex : TypeConstraint<
+  Or<[AnySignlessInteger.predicate, Index.predicate]>, "integer or index">;
 
 //===----------------------------------------------------------------------===//
 // GPU Dialect operations.
@@ -299,9 +296,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
 }
 
 def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
-    Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY,
-               IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX,
-               IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ,
+    Arguments<(ins IntOrIndex:$gridSizeX, IntOrIndex:$gridSizeY,
+               IntOrIndex:$gridSizeZ, IntOrIndex:$blockSizeX,
+               IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ,
                Variadic<AnyType>:$operands)>,
     Results<(outs)> {
   let summary = "Launches a function as a GPU kerneel";
@@ -333,7 +330,7 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
 
       // This module creates a separate compilation unit for the GPU compiler.
       gpu.module @kernels {
-        func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
+        func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>)
             attributes { nvvm.kernel = true } {
 
           // Operations that produce block/thread IDs and dimensions are
@@ -365,7 +362,7 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
                         %arg0, %arg1)      // Arguments passed to the kernel.
             { kernel_module = @kernels,    // Module containing the kernel.
               kernel = "kernel_1" }        // Kernel function.
-            : (index, index, index, index, index, index, f32, !llvm<"float*">)
+            : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
               -> ()
     }
     ```

diff  --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 2391b185bda8..8d0ba7cec1e7 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -3,34 +3,48 @@
 
 module attributes {gpu.container_module} {
 
-  // CHECK: llvm.mlir.global internal constant @[[kernel_name:.*]]("kernel\00")
-  // CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN")
-  // ROCDL: llvm.mlir.global internal constant @[[global:.*]]("HSACO")
+  // CHECK: llvm.mlir.global internal constant @[[KERNEL_NAME:.*]]("kernel\00")
+  // CHECK: llvm.mlir.global internal constant @[[GLOBAL:.*]]("CUBIN")
+  // ROCDL: llvm.mlir.global internal constant @[[GLOBAL:.*]]("HSACO")
 
-  gpu.module @kernel_module attributes {nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO"} {
-    llvm.func @kernel(%arg0: !llvm.float, %arg1: !llvm.ptr<float>) attributes {gpu.kernel} {
+  gpu.module @kernel_module attributes {
+      nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO"
+  } {
+    llvm.func @kernel(%arg0: !llvm.i32, %arg1: !llvm.ptr<float>,
+        %arg2: !llvm.ptr<float>, %arg3: !llvm.i64, %arg4: !llvm.i64,
+        %arg5: !llvm.i64) attributes {gpu.kernel} {
       llvm.return
     }
   }
 
-  llvm.func @foo() {
-    %0 = "op"() : () -> (!llvm.float)
-    %1 = "op"() : () -> (!llvm.ptr<float>)
-    %cst = llvm.mlir.constant(8 : index) : !llvm.i64
-
-    // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]]
-    // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index)
-    // CHECK: %[[binary:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]]
-    // CHECK-SAME: -> !llvm.ptr<i8>
-    // CHECK: %[[module:.*]] = llvm.call @mgpuModuleLoad(%[[binary]]) : (!llvm.ptr<i8>) -> !llvm.ptr<i8>
-    // CHECK: %[[func:.*]] = llvm.call @mgpuModuleGetFunction(%[[module]], {{.*}}) : (!llvm.ptr<i8>, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-    // CHECK: llvm.call @mgpuStreamCreate
-    // CHECK: llvm.call @mgpuLaunchKernel
-    // CHECK: llvm.call @mgpuStreamSynchronize
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_module::@kernel }
-        : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm.ptr<float>) -> ()
-
-    llvm.return
+  func @foo(%buffer: memref<?xf32>) {
+    %c8 = constant 8 : index
+    %c32 = constant 32 : i32
+    "gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8, %c32, %buffer) {
+      kernel = @kernel_module::@kernel
+    } : (index, index, index, index, index, index, i32, memref<?xf32>) -> ()
+    return
   }
 
+  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64   
+  // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]
+  // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index)
+  // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}[[C0]], [[C0]]]
+  // CHECK-SAME: -> !llvm.ptr<i8>
+
+  // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]])
+  // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}})
+
+  // CHECK: [[C0_I32:%.*]] = llvm.mlir.constant(0 : i32)
+  // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate
+
+  // CHECK: [[NUM_PARAMS:%.*]] = llvm.mlir.constant(6 : i32) : !llvm.i32
+  // CHECK-NEXT: [[PARAMS:%.*]] = llvm.alloca [[NUM_PARAMS]] x !llvm.ptr<i8>
+
+  // CHECK: [[EXTRA_PARAMS:%.*]] = llvm.mlir.null : !llvm.ptr<ptr<i8>>
+
+  // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]],
+  // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C0_I32]], [[STREAM]],
+  // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]])
+  // CHECK: llvm.call @mgpuStreamSynchronize
 }