[Mlir-commits] [mlir] 94058c4 - [mlir][GPU] Allow specifying alignment of memory attributions

Krzysztof Drewniak llvmlistbot at llvm.org
Wed May 3 14:51:20 PDT 2023


Author: Krzysztof Drewniak
Date: 2023-05-03T21:51:15Z
New Revision: 94058c41d43a4aae0e0914b151c166f2729affbe

URL: https://github.com/llvm/llvm-project/commit/94058c41d43a4aae0e0914b151c166f2729affbe
DIFF: https://github.com/llvm/llvm-project/commit/94058c41d43a4aae0e0914b151c166f2729affbe.diff

LOG: [mlir][GPU] Allow specifying alignment of memory attributions

Add support for argument attributes on workgroup and private
attributions for GPU functions. These arguments are outside the range
of getNumArguments() and get printed separately, so the default
mechanism for function argument attributes can't be used on them.

Having done this, check for the `llvm.align` attribute on workgroup or
private attributions in a `gpu.func` and pass it through to the
relevant allocation op (creating a global or alloca). This allows
people creating kernels that use multiple workgroup buffers to set an
alignment.

(This could, in the future, be a GPU dialect `alignment` attribute,
but I've taken the simpler route of using the LLVM version instead for
simplicity and because I don't know how this might impact backends
like Vulkan)

Reviewed By: nirvedhmeshram

Differential Revision: https://reviews.llvm.org/D148965

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
    mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/test/Conversion/GPUCommon/memory-attrbution.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index e67adbc73f929..12e09654191da 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -252,7 +252,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
 
   let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
                        OptionalAttr<DictArrayAttr>:$arg_attrs,
-                       OptionalAttr<DictArrayAttr>:$res_attrs);
+                       OptionalAttr<DictArrayAttr>:$res_attrs,
+                       OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
+                       OptionalAttr<DictArrayAttr>:$private_attrib_attrs);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
@@ -279,11 +281,17 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
       return attr ? attr.getInt() : 0;
     }
 
+    /// Return the index of the first workgroup attribution in the block argument
+    /// list.
+    unsigned getFirstWorkgroupAttributionIndex() {
+      return getFunctionType().getNumInputs();
+    }
+
     /// Returns a list of block arguments that correspond to buffers located in
     /// the workgroup memory
     ArrayRef<BlockArgument> getWorkgroupAttributions() {
       auto begin =
-          std::next(getBody().args_begin(), getFunctionType().getNumInputs());
+          std::next(getBody().args_begin(), getFirstWorkgroupAttributionIndex());
       auto end = std::next(begin, getNumWorkgroupAttributions());
       return {begin, end};
     }
@@ -292,20 +300,47 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
     /// workgroup memory.
     BlockArgument addWorkgroupAttribution(Type type, Location loc);
 
+    /// Get the workgroup attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the workgroup attributions.
+    DictionaryAttr getworkgroupAttributionAttrs(unsigned index);
+
+    /// Set the workgroup attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the workgroup attributions.
+    void setworkgroupAttributionAttrs(unsigned index, DictionaryAttr value);
+
+    /// Get an attribute for a workgroup attribution. `index` is counted
+    /// from the start of the workgroup attributions, not the start of the block.
+    Attribute getWorkgroupAttributionAttr(unsigned index, StringAttr name);
+    Attribute getWorkgroupAttributionAttr(unsigned index, StringRef name) {
+      return getWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
+    }
+
+    /// Set an attribute for a workgroup attribution. `index` is counted
+    /// from the start of the workgroup attributions, not the start of the block.
+    /// A null `value` removes an attributino attribute.
+    void setWorkgroupAttributionAttr(unsigned index, StringAttr name, Attribute value);
+    void setWorkgroupAttributionAttr(unsigned index, StringRef name, Attribute value) {
+      return setWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
+    }
+
     /// Returns the number of buffers located in the private memory.
     unsigned getNumPrivateAttributions() {
       return getBody().getNumArguments() - getFunctionType().getNumInputs() -
           getNumWorkgroupAttributions();
     }
 
+    /// Returns the index of the first private buffer in the block argument list.
+    unsigned getFirstPrivateAttributionIndex() {
+      // Buffers on the private memory always come after buffers on the workgroup
+      // memory.
+      return getFunctionType().getNumInputs() + getNumWorkgroupAttributions();
+    }
+
     /// Returns a list of block arguments that correspond to buffers located in
     /// the private memory.
     ArrayRef<BlockArgument> getPrivateAttributions() {
-      // Buffers on the private memory always come after buffers on the workgroup
-      // memory.
       auto begin =
-          std::next(getBody().args_begin(),
-                    getFunctionType().getNumInputs() + getNumWorkgroupAttributions());
+          std::next(getBody().args_begin(), getFirstPrivateAttributionIndex());
       return {begin, getBody().args_end()};
     }
 
@@ -313,6 +348,29 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
     /// private memory.
     BlockArgument addPrivateAttribution(Type type, Location loc);
 
+    /// Get the private attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the private attributions.
+    DictionaryAttr getPrivateAttributionAttrs(unsigned index);
+
+    /// Set the private attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the private attributions.
+    void setPrivateAttributionAttrs(unsigned index, DictionaryAttr value);
+
+    /// Get an attribute for a private attribution. `index` is counted
+    /// from the start of the private attributions, not the start of the block.
+    Attribute getPrivateAttributionAttr(unsigned index, StringAttr name);
+    Attribute getPrivateAttributionAttr(unsigned index, StringRef name) {
+      return getPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
+    }
+
+    /// Set an attribute for a private attribution. `index` is counted
+    /// from the start of the private attributions, not the start of the block.
+    /// A null `value` removes an attribute.
+    void setPrivateAttributionAttr(unsigned index, StringAttr name, Attribute value);
+    void setPrivateAttributionAttr(unsigned index, StringRef name, Attribute value) {
+      return setPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
+    }
+
     /// Returns the name of the attribute containing the number of buffers
     /// located in the workgroup memory.
     static StringRef getNumWorkgroupAttributionsAttrName() {

diff  --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 82c73b5f4dd2e..dceb83c249eae 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -24,7 +24,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
   SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
   workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
   for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
-    Value attribution = en.value();
+    BlockArgument attribution = en.value();
 
     auto type = attribution.getType().dyn_cast<MemRefType>();
     assert(type && type.hasStaticShape() && "unexpected type in attribution");
@@ -36,10 +36,17 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
     auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
     std::string name = std::string(
         llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
+    uint64_t alignment = 0;
+    if (auto alignAttr =
+            gpuFuncOp
+                .getWorkgroupAttributionAttr(
+                    en.index(), LLVM::LLVMDialect::getAlignAttrName())
+                .dyn_cast_or_null<IntegerAttr>())
+      alignment = alignAttr.getInt();
     auto globalOp = rewriter.create<LLVM::GlobalOp>(
         gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
-        LLVM::Linkage::Internal, name, /*value=*/Attribute(),
-        /*alignment=*/0, workgroupAddrSpace);
+        LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
+        workgroupAddrSpace);
     workgroupBuffers.push_back(globalOp);
   }
 
@@ -56,7 +63,10 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
   for (const auto &attr : gpuFuncOp->getAttrs()) {
     if (attr.getName() == SymbolTable::getSymbolAttrName() ||
         attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
-        attr.getName() == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName())
+        attr.getName() ==
+            gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
+        attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
+        attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName())
       continue;
     attributes.push_back(attr);
   }
@@ -124,9 +134,15 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
           getTypeConverter()->getPointerType(elementType, allocaAddrSpace);
       Value numElements = rewriter.create<LLVM::ConstantOp>(
           gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
+      uint64_t alignment = 0;
+      if (auto alignAttr =
+              gpuFuncOp
+                  .getPrivateAttributionAttr(
+                      en.index(), LLVM::LLVMDialect::getAlignAttrName())
+                  .dyn_cast_or_null<IntegerAttr>())
+        alignment = alignAttr.getInt();
       Value allocated = rewriter.create<LLVM::AllocaOp>(
-          gpuFuncOp.getLoc(), ptrType, elementType, numElements,
-          /*alignment=*/0);
+          gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
       auto descr = MemRefDescriptor::fromStaticShape(
           rewriter, loc, *getTypeConverter(), type, allocated);
       signatureConversion.remapInput(

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 3ce6083c1f009..4c188d3a29263 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1017,6 +1017,49 @@ void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
   body->getBlocks().push_back(entryBlock);
 }
 
+/// Parses a GPU function memory attribution.
+///
+/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
+///                        (`private` `(` ssa-id-and-type-list `)`)?
+///
+/// Note that this function parses only one of the two similar parts, with the
+/// keyword provided as argument.
+static ParseResult
+parseAttributions(OpAsmParser &parser, StringRef keyword,
+                  SmallVectorImpl<OpAsmParser::Argument> &args,
+                  Attribute &attributionAttrs) {
+  // If we could not parse the keyword, just assume empty list and succeed.
+  if (failed(parser.parseOptionalKeyword(keyword)))
+    return success();
+
+  size_t existingArgs = args.size();
+  ParseResult result =
+      parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
+                               /*allowType=*/true, /*allowAttrs=*/true);
+  if (failed(result))
+    return result;
+
+  bool hadAttrs = llvm::any_of(ArrayRef(args).drop_front(existingArgs),
+                               [](const OpAsmParser::Argument &arg) -> bool {
+                                 return arg.attrs && !arg.attrs.empty();
+                               });
+  if (!hadAttrs) {
+    attributionAttrs = nullptr;
+    return result;
+  }
+
+  Builder &builder = parser.getBuilder();
+  SmallVector<Attribute> attributionAttrsVec;
+  for (const auto &argument : ArrayRef(args).drop_front(existingArgs)) {
+    if (!argument.attrs)
+      attributionAttrsVec.push_back(builder.getDictionaryAttr({}));
+    else
+      attributionAttrsVec.push_back(argument.attrs);
+  }
+  attributionAttrs = builder.getArrayAttr(attributionAttrsVec);
+  return result;
+}
+
 /// Parses a GPU function.
 ///
 /// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
@@ -1059,9 +1102,10 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
       builder, result, entryArgs, resultAttrs, getArgAttrsAttrName(result.name),
       getResAttrsAttrName(result.name));
 
+  Attribute workgroupAttributionAttrs;
   // Parse workgroup memory attributions.
   if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
-                               entryArgs)))
+                               entryArgs, workgroupAttributionAttrs)))
     return failure();
 
   // Store the number of operands we just parsed as the number of workgroup
@@ -1069,11 +1113,18 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
   unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs();
   result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
                       builder.getI64IntegerAttr(numWorkgroupAttrs));
+  if (workgroupAttributionAttrs)
+    result.addAttribute(GPUFuncOp::getWorkgroupAttribAttrsAttrName(result.name),
+                        workgroupAttributionAttrs);
 
+  Attribute privateAttributionAttrs;
   // Parse private memory attributions.
-  if (failed(
-          parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), entryArgs)))
+  if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(),
+                               entryArgs, privateAttributionAttrs)))
     return failure();
+  if (privateAttributionAttrs)
+    result.addAttribute(GPUFuncOp::getPrivateAttribAttrsAttrName(result.name),
+                        privateAttributionAttrs);
 
   // Parse the kernel attribute if present.
   if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
@@ -1090,6 +1141,28 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
   return parser.parseRegion(*body, entryArgs);
 }
 
+static void printAttributions(OpAsmPrinter &p, StringRef keyword,
+                              ArrayRef<BlockArgument> values,
+                              ArrayAttr attributes) {
+  if (values.empty())
+    return;
+
+  p << ' ' << keyword << '(';
+  llvm::interleaveComma(
+      llvm::enumerate(values), p, [&p, attributes](auto pair) {
+        BlockArgument v = pair.value();
+        p << v << " : " << v.getType();
+
+        size_t attributionIndex = pair.index();
+        DictionaryAttr attrs;
+        if (attributes && attributionIndex < attributes.size())
+          attrs = attributes[attributionIndex].cast<DictionaryAttr>();
+        if (attrs)
+          p.printOptionalAttrDict(attrs.getValue());
+      });
+  p << ')';
+}
+
 void GPUFuncOp::print(OpAsmPrinter &p) {
   p << ' ';
   p.printSymbolName(getName());
@@ -1099,8 +1172,10 @@ void GPUFuncOp::print(OpAsmPrinter &p) {
                                                   /*isVariadic=*/false,
                                                   type.getResults());
 
-  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
-  printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
+  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions(),
+                    getWorkgroupAttribAttrs().value_or(nullptr));
+  printAttributions(p, getPrivateKeyword(), getPrivateAttributions(),
+                    getPrivateAttribAttrs().value_or(nullptr));
   if (isKernel())
     p << ' ' << getKernelKeyword();
 
@@ -1108,11 +1183,130 @@ void GPUFuncOp::print(OpAsmPrinter &p) {
       p, *this,
       {getNumWorkgroupAttributionsAttrName(),
        GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(),
-       getArgAttrsAttrName(), getResAttrsAttrName()});
+       getArgAttrsAttrName(), getResAttrsAttrName(),
+       getWorkgroupAttribAttrsAttrName(), getPrivateAttribAttrsAttrName()});
   p << ' ';
   p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
 }
 
+static DictionaryAttr getAttributionAttrs(GPUFuncOp op, unsigned index,
+                                          StringAttr attrName) {
+  auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
+  if (!allAttrs || index >= allAttrs.size())
+    return DictionaryAttr();
+  return allAttrs[index].cast<DictionaryAttr>();
+}
+
+DictionaryAttr GPUFuncOp::getworkgroupAttributionAttrs(unsigned index) {
+  return getAttributionAttrs(*this, index, getWorkgroupAttribAttrsAttrName());
+}
+
+DictionaryAttr GPUFuncOp::getPrivateAttributionAttrs(unsigned index) {
+  return getAttributionAttrs(*this, index, getPrivateAttribAttrsAttrName());
+}
+
+static void setAttributionAttrs(GPUFuncOp op, unsigned index,
+                                DictionaryAttr value, StringAttr attrName) {
+  MLIRContext *ctx = op.getContext();
+  auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
+  SmallVector<Attribute> elements;
+  if (allAttrs)
+    elements.append(allAttrs.begin(), allAttrs.end());
+  while (elements.size() <= index)
+    elements.push_back(DictionaryAttr::get(ctx));
+  if (!value)
+    elements[index] = DictionaryAttr::get(ctx);
+  else
+    elements[index] = value;
+  ArrayAttr newValue = ArrayAttr::get(ctx, elements);
+  op->setAttr(attrName, newValue);
+}
+
+void GPUFuncOp::setworkgroupAttributionAttrs(unsigned index,
+                                             DictionaryAttr value) {
+  setAttributionAttrs(*this, index, value, getWorkgroupAttribAttrsAttrName());
+}
+
+void GPUFuncOp::setPrivateAttributionAttrs(unsigned int index,
+                                           DictionaryAttr value) {
+  setAttributionAttrs(*this, index, value, getPrivateAttribAttrsAttrName());
+}
+
+static Attribute getAttributionAttr(GPUFuncOp op, unsigned index,
+                                    StringAttr name, StringAttr attrsName) {
+  DictionaryAttr dict = getAttributionAttrs(op, index, attrsName);
+  if (!dict)
+    return Attribute();
+  return dict.get(name);
+}
+
+Attribute GPUFuncOp::getWorkgroupAttributionAttr(unsigned index,
+                                                 StringAttr name) {
+  assert(index < getNumWorkgroupAttributions() &&
+         "index must map to a workgroup attribution");
+  return getAttributionAttr(*this, index, name,
+                            getWorkgroupAttribAttrsAttrName());
+}
+
+Attribute GPUFuncOp::getPrivateAttributionAttr(unsigned index,
+                                               StringAttr name) {
+  assert(index < getNumPrivateAttributions() &&
+         "index must map to a private attribution");
+  return getAttributionAttr(*this, index, name,
+                            getPrivateAttribAttrsAttrName());
+}
+
+static void setAttributionAttr(GPUFuncOp op, unsigned index, StringAttr name,
+                               Attribute value, StringAttr attrsName) {
+  MLIRContext *ctx = op.getContext();
+  SmallVector<NamedAttribute> elems;
+  DictionaryAttr oldDict = getAttributionAttrs(op, index, attrsName);
+  if (oldDict)
+    elems.append(oldDict.getValue().begin(), oldDict.getValue().end());
+
+  bool found = false;
+  bool mustSort = true;
+  for (unsigned i = 0, e = elems.size(); i < e; ++i) {
+    if (elems[i].getName() == name) {
+      found = true;
+      if (!value) {
+        std::swap(elems[i], elems[elems.size() - 1]);
+        elems.pop_back();
+      } else {
+        mustSort = false;
+        elems[i] = NamedAttribute(elems[i].getName(), value);
+      }
+      break;
+    }
+  }
+  if (!found) {
+    if (!value)
+      return;
+    elems.emplace_back(name, value);
+  }
+  if (mustSort) {
+    DictionaryAttr::sortInPlace(elems);
+  }
+  auto newDict = DictionaryAttr::getWithSorted(ctx, elems);
+  setAttributionAttrs(op, index, newDict, attrsName);
+}
+
+void GPUFuncOp::setWorkgroupAttributionAttr(unsigned index, StringAttr name,
+                                            Attribute value) {
+  assert(index < getNumWorkgroupAttributions() &&
+         "index must map to a workgroup attribution");
+  setAttributionAttr(*this, index, name, value,
+                     getWorkgroupAttribAttrsAttrName());
+}
+
+void GPUFuncOp::setPrivateAttributionAttr(unsigned index, StringAttr name,
+                                          Attribute value) {
+  assert(index < getNumPrivateAttributions() &&
+         "index must map to a private attribution");
+  setAttributionAttr(*this, index, name, value,
+                     getPrivateAttribAttrsAttrName());
+}
+
 LogicalResult GPUFuncOp::verifyType() {
   if (isKernel() && getFunctionType().getNumResults() != 0)
     return emitOpError() << "expected void return type for kernel function";

diff  --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
index 2762019794d1a..2457cbcf3251c 100644
--- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
@@ -225,3 +225,34 @@ gpu.module @kernel {
     "terminator"() : () -> ()
   }
 }
+
+// -----
+
+gpu.module @kernel {
+  // Check that alignment attributes are set correctly
+  // NVVM: llvm.mlir.global internal @[[$buffer:.*]]()
+  // NVVM-SAME:  addr_space = 3
+  // NVVM-SAME:  alignment = 8
+  // NVVM-SAME:  !llvm.array<48 x f32>
+
+  // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]()
+  // ROCDL-SAME:  addr_space = 3
+  // ROCDL-SAME:  alignment = 8
+  // ROCDL-SAME:  !llvm.array<48 x f32>
+
+  // NVVM-LABEL: llvm.func @explicitAlign
+  // ROCDL-LABEL: llvm.func @explicitAlign
+  gpu.func @explicitAlign(%arg0 : index)
+    workgroup(%arg1: memref<48xf32, #gpu.address_space<workgroup>> {llvm.align = 8 : i64})
+    private(%arg2: memref<48xf32, #gpu.address_space<private>> {llvm.align = 4 : i64}) {
+    // NVVM: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
+    // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+
+    // ROCDL: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
+    // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr<5>
+
+    %val = memref.load %arg1[%arg0] : memref<48xf32, #gpu.address_space<workgroup>>
+    memref.store %val, %arg2[%arg0] : memref<48xf32, #gpu.address_space<private>>
+    "terminator"() : () -> ()
+  }
+}


        


More information about the Mlir-commits mailing list