[Mlir-commits] [mlir] d59ddba - [mlir] Fix gpu-to-llvm lowering for gpu.alloc with dynamic sizes.

Mon Jan 11 06:55:58 PST 2021

Author: Christian Sigg
Date: 2021-01-11T15:55:48+01:00
New Revision: d59ddba777251c59a667d03021900c72f9882a4b

URL: https://github.com/llvm/llvm-project/commit/d59ddba777251c59a667d03021900c72f9882a4b
DIFF: https://github.com/llvm/llvm-project/commit/d59ddba777251c59a667d03021900c72f9882a4b.diff

LOG: [mlir] Fix gpu-to-llvm lowering for gpu.alloc with dynamic sizes.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D94402

Added: 
    

Modified: 
    mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
    mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
    mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
    mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
    mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index 82cf7e772afe..357bd2f021b1 100644

--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -533,7 +533,7 @@ class ConvertToLLVMPattern : public ConversionPattern {
   ///                  : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
   /// `sizeBytes`  = llvm.ptrtoint %gep : !llvm.ptr<f32> to i64
   void getMemRefDescriptorSizes(Location loc, MemRefType memRefType,
-                                ArrayRef<Value> dynamicSizes,
+                                ValueRange dynamicSizes,
                                 ConversionPatternRewriter &rewriter,
                                 SmallVectorImpl<Value> &sizes,
                                 SmallVectorImpl<Value> &strides,

diff  --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index 8c89b6d20099..cee1d7ba20e3 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -373,19 +373,19 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
     return failure();
 
   auto loc = allocOp.getLoc();
+  auto adaptor = gpu::AllocOpAdaptor(operands, allocOp->getAttrDictionary());
 
   // Get shape of the memref as values: static sizes are constant
   // values and dynamic sizes are passed to 'alloc' as operands.
   SmallVector<Value, 4> shape;
   SmallVector<Value, 4> strides;
   Value sizeBytes;
-  getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, shape, strides,
-                           sizeBytes);
+  getMemRefDescriptorSizes(loc, memRefType, adaptor.dynamicSizes(), rewriter,
+                           shape, strides, sizeBytes);
 
   // Allocate the underlying buffer and store a pointer to it in the MemRef
   // descriptor.
   Type elementPtrType = this->getElementPtrType(memRefType);
-  auto adaptor = gpu::AllocOpAdaptor(operands, allocOp->getAttrDictionary());
   auto stream = adaptor.asyncDependencies().front();
   Value allocatedPtr =
       allocCallBuilder.create(loc, rewriter, {sizeBytes, stream}).getResult(0);

diff  --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
index 0978e8cf756a..604b4576fc1b 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
@@ -212,8 +212,8 @@ class GPULaunchLowering : public ConvertOpToLLVMPattern<gpu::LaunchFuncOp> {
       SmallVector<Value, 4> sizes;
       SmallVector<Value, 4> strides;
       Value sizeBytes;
-      getMemRefDescriptorSizes(loc, memRefType, operand.value(), rewriter,
-                               sizes, strides, sizeBytes);
+      getMemRefDescriptorSizes(loc, memRefType, {}, rewriter, sizes, strides,
+                               sizeBytes);
       MemRefDescriptor descriptor(operand.value());
       Value src = descriptor.allocatedPtr(rewriter, loc);
 

diff  --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 53ebd6721863..512273347e4b 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -1083,11 +1083,14 @@ Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const {
 }
 
 void ConvertToLLVMPattern::getMemRefDescriptorSizes(
-    Location loc, MemRefType memRefType, ArrayRef<Value> dynamicSizes,
+    Location loc, MemRefType memRefType, ValueRange dynamicSizes,
     ConversionPatternRewriter &rewriter, SmallVectorImpl<Value> &sizes,
     SmallVectorImpl<Value> &strides, Value &sizeBytes) const {
   assert(isConvertibleAndHasIdentityMaps(memRefType) &&
          "layout maps must have been normalized away");
+  assert(count(memRefType.getShape(), ShapedType::kDynamicSize) ==
+             static_cast<ssize_t>(dynamicSizes.size()) &&
+         "dynamicSizes size doesn't match dynamic sizes count in memref shape");
 
   sizes.reserve(memRefType.getRank());
   unsigned dynamicIndex = 0;
@@ -4080,8 +4083,7 @@ SmallVector<Value, 4> LLVMTypeConverter::promoteOperands(Location loc,
         continue;
       }
       if (auto memrefType = operand.getType().dyn_cast<MemRefType>()) {
-        MemRefDescriptor::unpack(builder, loc, llvmOperand,
-                                 operand.getType().cast<MemRefType>(),
+        MemRefDescriptor::unpack(builder, loc, llvmOperand, memrefType,
                                  promotedOperands);
         continue;
       }

diff  --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
index d1b1af813163..4169f0e8191d 100644
--- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
@@ -1,16 +1,19 @@
 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
 
 module attributes {gpu.container_module} {
-  func @main() {
+  // CHECK-LABEL: llvm.func @main
+  // CHECK-SAME: %[[size:.*]]: i64
+  func @main(%size : index) {
     // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate()
     %0 = gpu.wait async
-    // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
+    // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
+    // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
     // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]])
-    %1, %2 = gpu.alloc async [%0] () : memref<13xf32>
+    %1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
     // CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
     // CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]]
     // CHECK: llvm.call @mgpuMemFree(%[[void_ptr]], %[[stream]])
-    %3 = gpu.dealloc async [%2] %1 : memref<13xf32>
+    %3 = gpu.dealloc async [%2] %1 : memref<?xf32>
     // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]])
     // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]])
     gpu.wait [%3]