[Mlir-commits] [mlir] aaddca1 - [mlir][GPUToSPIRV] Modify the lowering of gpu.block_dim to be consistent with Vulkan SPEC

Sat Feb 8 22:31:20 PST 2020

Author: MaheshRavishankar
Date: 2020-02-08T22:30:03-08:00
New Revision: aaddca1efd1d5a66d014023b9649cb273a84a7ae

URL: https://github.com/llvm/llvm-project/commit/aaddca1efd1d5a66d014023b9649cb273a84a7ae
DIFF: https://github.com/llvm/llvm-project/commit/aaddca1efd1d5a66d014023b9649cb273a84a7ae.diff

LOG: [mlir][GPUToSPIRV] Modify the lowering of gpu.block_dim to be consistent with Vulkan SPEC

The existing lowering of gpu.block_dim added a global variable with
the WorkGroupSize decoration. This raises an error within
Vulkan/SPIR-V validation since Vulkan requires this to have a constant
initializer. This is not yet supported in SPIR-V dialect. Changing the
lowering to return the workgroup size as a constant value instead,
obtained from spv.entry_point_abi attribute gets around the issue for
now. The validation goes through since the workgroup size is specified
using spv.execution_mode operation.

Added: 
    

Modified: 
    mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
    mlir/test/Conversion/GPUToSPIRV/builtins.mlir
    mlir/test/Conversion/GPUToSPIRV/load-store.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
index 53bf72a9a842..6a5da3f4e38a 100644

--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@@ -68,6 +68,19 @@ class LaunchConfigConversion : public SPIRVOpLowering<SourceOp> {
                   ConversionPatternRewriter &rewriter) const override;
 };
 
+/// This is separate because in Vulkan workgroup size is exposed to shaders via
+/// a constant with WorkgroupSize decoration. So here we cannot generate a
+/// builtin variable; instead the infromation in the `spv.entry_point_abi`
+/// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
+class WorkGroupSizeConversion : public SPIRVOpLowering<gpu::BlockDimOp> {
+public:
+  using SPIRVOpLowering<gpu::BlockDimOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(gpu::BlockDimOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
 /// Pattern to convert a kernel function in GPU dialect within a spv.module.
 class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
 public:
@@ -240,34 +253,54 @@ IfOpConversion::matchAndRewrite(loop::IfOp ifOp, ArrayRef<Value> operands,
 // Builtins.
 //===----------------------------------------------------------------------===//
 
-template <typename SourceOp, spirv::BuiltIn builtin>
-PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
-    SourceOp op, ArrayRef<Value> operands,
-    ConversionPatternRewriter &rewriter) const {
-  auto dimAttr =
-      op.getOperation()->template getAttrOfType<StringAttr>("dimension");
+static Optional<int32_t> getLaunchConfigIndex(Operation *op) {
+  auto dimAttr = op->getAttrOfType<StringAttr>("dimension");
   if (!dimAttr) {
-    return this->matchFailure();
+    return {};
   }
-  int32_t index = 0;
   if (dimAttr.getValue() == "x") {
-    index = 0;
+    return 0;
   } else if (dimAttr.getValue() == "y") {
-    index = 1;
+    return 1;
   } else if (dimAttr.getValue() == "z") {
-    index = 2;
-  } else {
-    return this->matchFailure();
+    return 2;
   }
+  return {};
+}
+
+template <typename SourceOp, spirv::BuiltIn builtin>
+PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
+    SourceOp op, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  auto index = getLaunchConfigIndex(op);
+  if (!index)
+    return this->matchFailure();
 
   // SPIR-V invocation builtin variables are a vector of type <3xi32>
   auto spirvBuiltin = spirv::getBuiltinVariableValue(op, builtin, rewriter);
   rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
       op, rewriter.getIntegerType(32), spirvBuiltin,
-      rewriter.getI32ArrayAttr({index}));
+      rewriter.getI32ArrayAttr({index.getValue()}));
   return this->matchSuccess();
 }
 
+PatternMatchResult WorkGroupSizeConversion::matchAndRewrite(
+    gpu::BlockDimOp op, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  auto index = getLaunchConfigIndex(op);
+  if (!index)
+    return matchFailure();
+
+  auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
+  auto val = workGroupSizeAttr.getValue<int32_t>(index.getValue());
+  auto convertedType = typeConverter.convertType(op.getResult().getType());
+  if (!convertedType)
+    return matchFailure();
+  rewriter.replaceOpWithNewOp<spirv::ConstantOp>(
+      op, convertedType, IntegerAttr::get(convertedType, val));
+  return matchSuccess();
+}
+
 //===----------------------------------------------------------------------===//
 // GPUFuncOp
 //===----------------------------------------------------------------------===//
@@ -401,13 +434,11 @@ void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
   populateWithGenerated(context, &patterns);
   patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
   patterns.insert<
-      ForOpConversion, GPUReturnOpConversion, IfOpConversion,
-      GPUModuleConversion,
-      GPUReturnOpConversion, ForOpConversion, GPUModuleConversion,
-      LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
+      ForOpConversion, GPUModuleConversion, GPUReturnOpConversion,
+      IfOpConversion,
       LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
       LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
       LaunchConfigConversion<gpu::ThreadIdOp,
                              spirv::BuiltIn::LocalInvocationId>,
-      TerminatorOpConversion>(context, typeConverter);
+      TerminatorOpConversion, WorkGroupSizeConversion>(context, typeConverter);
 }

diff  --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
index 7f4081e4eda0..6df86d2be56f 100644
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s
+// RUN: mlir-opt -split-input-file -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s
 
 module attributes {gpu.container_module} {
   func @builtin() {
@@ -77,13 +77,11 @@ module attributes {gpu.container_module} {
   }
 
   // CHECK-LABEL:  spv.module "Logical" "GLSL450"
-  // CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
   gpu.module @kernels {
     gpu.func @builtin_workgroup_size_x()
       attributes {gpu.kernel} {
-      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
-      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
-      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
+      // The constant value is obtained fomr the command line option above.
+      // CHECK: spv.constant 32 : i32
       %0 = "gpu.block_dim"() {dimension = "x"} : () -> index
       gpu.return
     }
@@ -92,6 +90,48 @@ module attributes {gpu.container_module} {
 
 // -----
 
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = constant 1 : index
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_y", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    return
+  }
+
+  // CHECK-LABEL:  spv.module "Logical" "GLSL450"
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_y()
+      attributes {gpu.kernel} {
+      // The constant value is obtained fomr the command line option above.
+      // CHECK: spv.constant 4 : i32
+      %0 = "gpu.block_dim"() {dimension = "y"} : () -> index
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = constant 1 : index
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_z", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    return
+  }
+
+  // CHECK-LABEL:  spv.module "Logical" "GLSL450"
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_z()
+      attributes {gpu.kernel} {
+      // The constant value is obtained fomr the command line option above (1 is default).
+      // CHECK: spv.constant 1 : i32
+      %0 = "gpu.block_dim"() {dimension = "z"} : () -> index
+      gpu.return
+    }
+  }
+}
+
+// -----
+
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index

diff  --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
index 446c0d602ed3..919c90981573 100644
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -17,7 +17,6 @@ module attributes {gpu.container_module} {
 
   // CHECK-LABEL: spv.module "Logical" "GLSL450"
   gpu.module @kernels {
-    // CHECK-DAG: spv.globalVariable [[WORKGROUPSIZEVAR:@.*]] built_in("WorkgroupSize") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-DAG: spv.globalVariable [[WORKGROUPIDVAR:@.*]] built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>