[Mlir-commits] [mlir] ddd6acd - [mlir][GPU] Expand LLVM function attribute copies (#76755)

Wed Jan 3 12:28:20 PST 2024

Author: Krzysztof Drewniak
Date: 2024-01-03T14:28:15-06:00
New Revision: ddd6acd7a8e9296544bce49af7178f3eeb318e61

URL: https://github.com/llvm/llvm-project/commit/ddd6acd7a8e9296544bce49af7178f3eeb318e61
DIFF: https://github.com/llvm/llvm-project/commit/ddd6acd7a8e9296544bce49af7178f3eeb318e61.diff

LOG: [mlir][GPU] Expand LLVM function attribute copies (#76755)

Expand the copying of attributes on GPU kernel arguments during LLVM
lowering.

Support copying attributes from values that are already LLVM pointers.

Support copying attributes, like `noundef`, that aren't specific to (the
pointer parts of) arguments.

Added: 
    

Modified: 
    mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
    mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index e79a02f931af2d..6a005e67ca95ba 100644

--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -26,9 +26,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
 
   SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
   workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
-  for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
-    BlockArgument attribution = en.value();
-
+  for (const auto [idx, attribution] :
+       llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
     auto type = dyn_cast<MemRefType>(attribution.getType());
     assert(type && type.hasStaticShape() && "unexpected type in attribution");
 
@@ -37,12 +36,12 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
     auto elementType =
         cast<Type>(typeConverter->convertType(type.getElementType()));
     auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
-    std::string name = std::string(
-        llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
+    std::string name =
+        std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
     uint64_t alignment = 0;
     if (auto alignAttr =
             dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getWorkgroupAttributionAttr(
-                en.index(), LLVM::LLVMDialect::getAlignAttrName())))
+                idx, LLVM::LLVMDialect::getAlignAttrName())))
       alignment = alignAttr.getInt();
     auto globalOp = rewriter.create<LLVM::GlobalOp>(
         gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
@@ -105,8 +104,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
     rewriter.setInsertionPointToStart(&gpuFuncOp.front());
     unsigned numProperArguments = gpuFuncOp.getNumArguments();
 
-    for (const auto &en : llvm::enumerate(workgroupBuffers)) {
-      LLVM::GlobalOp global = en.value();
+    for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
       auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
                                                 global.getAddrSpace());
       Value address = rewriter.create<LLVM::AddressOfOp>(
@@ -119,18 +117,18 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
       // existing memref infrastructure. This may use more registers than
       // otherwise necessary given that memref sizes are fixed, but we can try
       // and canonicalize that away later.
-      Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
+      Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
       auto type = cast<MemRefType>(attribution.getType());
       auto descr = MemRefDescriptor::fromStaticShape(
           rewriter, loc, *getTypeConverter(), type, memory);
-      signatureConversion.remapInput(numProperArguments + en.index(), descr);
+      signatureConversion.remapInput(numProperArguments + idx, descr);
     }
 
     // Rewrite private memory attributions to alloca'ed buffers.
     unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
     auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
-    for (const auto &en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
-      Value attribution = en.value();
+    for (const auto [idx, attribution] :
+         llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
       auto type = cast<MemRefType>(attribution.getType());
       assert(type && type.hasStaticShape() && "unexpected type in attribution");
 
@@ -145,14 +143,14 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
       uint64_t alignment = 0;
       if (auto alignAttr =
               dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
-                  en.index(), LLVM::LLVMDialect::getAlignAttrName())))
+                  idx, LLVM::LLVMDialect::getAlignAttrName())))
         alignment = alignAttr.getInt();
       Value allocated = rewriter.create<LLVM::AllocaOp>(
           gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
       auto descr = MemRefDescriptor::fromStaticShape(
           rewriter, loc, *getTypeConverter(), type, allocated);
       signatureConversion.remapInput(
-          numProperArguments + numWorkgroupAttributions + en.index(), descr);
+          numProperArguments + numWorkgroupAttributions + idx, descr);
     }
   }
 
@@ -169,15 +167,16 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
   if (getTypeConverter()->getOptions().useBarePtrCallConv) {
     OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPointToStart(&llvmFuncOp.getBody().front());
-    for (const auto &en : llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
-      auto memrefTy = dyn_cast<MemRefType>(en.value());
+    for (const auto [idx, argTy] :
+         llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
+      auto memrefTy = dyn_cast<MemRefType>(argTy);
       if (!memrefTy)
         continue;
       assert(memrefTy.hasStaticShape() &&
              "Bare pointer convertion used with dynamically-shaped memrefs");
       // Use a placeholder when replacing uses of the memref argument to prevent
       // circular replacements.
-      auto remapping = signatureConversion.getInputMapping(en.index());
+      auto remapping = signatureConversion.getInputMapping(idx);
       assert(remapping && remapping->size == 1 &&
              "Type converter should produce 1-to-1 mapping for bare memrefs");
       BlockArgument newArg =
@@ -193,19 +192,23 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
 
   // Get memref type from function arguments and set the noalias to
   // pointer arguments.
-  for (const auto &en : llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
-    auto memrefTy = en.value().dyn_cast<MemRefType>();
-    NamedAttrList argAttr = argAttrs
-                                ? argAttrs[en.index()].cast<DictionaryAttr>()
-                                : NamedAttrList();
-
+  for (const auto [idx, argTy] :
+       llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
+    auto remapping = signatureConversion.getInputMapping(idx);
+    NamedAttrList argAttr =
+        argAttrs ? argAttrs[idx].cast<DictionaryAttr>() : NamedAttrList();
+    auto copyAttribute = [&](StringRef attrName) {
+      Attribute attr = argAttr.erase(attrName);
+      if (!attr)
+        return;
+      for (size_t i = 0, e = remapping->size; i < e; ++i)
+        llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
+    };
     auto copyPointerAttribute = [&](StringRef attrName) {
       Attribute attr = argAttr.erase(attrName);
 
-      // This is a proxy for the bare pointer calling convention.
       if (!attr)
         return;
-      auto remapping = signatureConversion.getInputMapping(en.index());
       if (remapping->size > 1 &&
           attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
         emitWarning(llvmFuncOp.getLoc(),
@@ -224,10 +227,23 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
     if (argAttr.empty())
       continue;
 
-    if (memrefTy) {
+    copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
+    copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
+    copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
+    bool lowersToPointer = false;
+    for (size_t i = 0, e = remapping->size; i < e; ++i) {
+      lowersToPointer |= isa<LLVM::LLVMPointerType>(
+          llvmFuncOp.getArgument(remapping->inputNo + i).getType());
+    }
+
+    if (lowersToPointer) {
       copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
+      copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
+      copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
+      copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
       copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
       copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
+      copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
       copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
       copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
       copyPointerAttribute(

diff  --git a/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir b/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
index 33374984eb7c91..e7c742067b4eb5 100644
--- a/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
@@ -24,6 +24,17 @@ gpu.module @kernel {
 // ROCDL-SAME:  !llvm.ptr {llvm.writeonly}
 //  NVVM-SAME:  !llvm.ptr {llvm.writeonly}
 
+// -----
+
+gpu.module @kernel {
+  gpu.func @test_func_readonly_ptr(%arg0 : !llvm.ptr {llvm.readonly} ) {
+    gpu.return
+  }
+}
+
+// CHECK-LABEL:  llvm.func @test_func_readonly_ptr
+// ROCDL-SAME:  !llvm.ptr {llvm.readonly}
+//  NVVM-SAME:  !llvm.ptr {llvm.readonly}
 
 // -----
 
@@ -62,3 +73,17 @@ gpu.module @kernel {
 // CHECK-LABEL:  llvm.func @test_func_dereferenceable_or_null
 // ROCDL-SAME:  !llvm.ptr {llvm.dereferenceable_or_null = 4 : i64}
 //  NVVM-SAME:  !llvm.ptr {llvm.dereferenceable_or_null = 4 : i64}
+
+// -----
+
+gpu.module @kernel {
+  gpu.func @test_func_noundef(%arg0 : memref<f32> {llvm.noundef} ) {
+    gpu.return
+  }
+}
+
+// CHECK-LABEL:  llvm.func @test_func_noundef
+// ROCDL-SAME:  !llvm.ptr {llvm.noundef}
+// ROCDL-SAME:  i64 {llvm.noundef}
+//  NVVM-SAME:  !llvm.ptr {llvm.noundef}
+//  NVVM-SAME:  i64 {llvm.noundef}