[Mlir-commits] [mlir] [mlir][ArmSME] Add rudimentary support for tile spills to the stack (PR #76086)

Cullen Rhodes llvmlistbot at llvm.org
Fri Dec 22 00:54:35 PST 2023


================
@@ -129,6 +132,221 @@ IntegerAttr getTileIdOrError(arm_sme::ArmSMETileOpInterface op) {
   return tileId;
 }
 
+/// Creates an alloca matching the size of tile used by `tileOp`. The alloca is
+/// placed in the first block of the function.
+static memref::AllocaOp
+createAllocaForTile(RewriterBase &rewriter, Location loc,
+                    FunctionOpInterface func,
+                    arm_sme::ArmSMETileOpInterface tileOp) {
+  RewriterBase::InsertionGuard g(rewriter);
+  // Move to the first operation in the function.
+  rewriter.setInsertionPointToStart(&func.getBlocks().front());
+  // Create an alloca matching the tile size of the `tileOp`.
+  auto vscale = rewriter.create<vector::VectorScaleOp>(loc);
+  auto tileElementType = tileOp.getTileType().getElementType();
+  auto memrefType = MemRefType::get(
+      {ShapedType::kDynamic, ShapedType::kDynamic}, tileElementType);
+  unsigned minElements = arm_sme::getSMETileSliceMinNumElts(tileElementType);
+  auto minElementsOp =
+      rewriter.create<arith::ConstantIndexOp>(loc, minElements);
+  auto vectorLen = rewriter.create<arith::MulIOp>(loc, vscale, minElementsOp);
+  auto alloca = rewriter.create<memref::AllocaOp>(
+      loc, memrefType, ValueRange{vectorLen, vectorLen});
+  return alloca;
+}
+
+/// Finds or creates an alloca for a spill of a tile.
+static memref::AllocaOp getOrCreateAllocaForTile(
+    RewriterBase &rewriter, Location loc, FunctionOpInterface func,
+    arm_sme::ArmSMETileOpInterface tileOp, unsigned tileId) {
+  // Find an alloca at the top of the function tagged with a
+  // 'arm_sme.in_memory_tile_id' that matches `tileId`.
+  for (auto &op : func.getBlocks().front()) {
+    auto alloca = llvm::dyn_cast<memref::AllocaOp>(op);
+    if (!alloca)
+      continue;
+    auto inMemoryTileId = llvm::dyn_cast_or_null<IntegerAttr>(
+        alloca->getDiscardableAttr(kInMemoryTileIdAttr));
+    if (!inMemoryTileId)
+      continue;
+    if (inMemoryTileId.getInt() == tileId)
+      return alloca;
+  }
+  // Otherwise, create a new alloca:
+  auto alloca = createAllocaForTile(rewriter, loc, func, tileOp);
+  alloca->setDiscardableAttr(kInMemoryTileIdAttr,
+                             rewriter.getI32IntegerAttr(tileId));
+  return alloca;
+}
+
+/// Very naive lowering of in-memory tiles (i.e. tiles that were not assigned a
+/// hardware tile ID) to ArmSME intrinsics. Currently, this works by assigning
+/// the op to tile 0, then emitting a full tile swap between ZA and memory
+/// before + after the tile op.
+///
+/// Example:
+///
+///    // Note: <IN MEMORY TILE> = tile ID >= 16.
+///    arm_sme.tile_op { tile_id = <IN MEMORY TILE> }
+///
+/// is converted to:
+///     // At function entry:
+///     %spill = memref.alloca ... : memref<?x?xty>
+///
+///     // Around op:
+///     scf.for %slice_idx {
+///       %slice_to_save = "arm_sme.intr.read.horiz" ... <{tile_id = 0 : i32}>
+///       "arm_sme.intr.ld1h.horiz"(%spill, %slice_idx)  <{tile_id = 0 : i32}>
+///       vector.store %slice_to_save, %spill[%slice_idx, %c0]
+///     }
+///     arm_sme.tile_op { tile_id = 0 }
+///     scf.for %slice_idx {
+///       %slice_to_save = "arm_sme.intr.read.horiz" ... <{tile_id = 0 : i32}>
+///       "arm_sme.intr.ld1h.horiz"(%spill, %slice_idx)  <{tile_id = 0 : i32}>
+///       vector.store %slice_to_save, %spill[%slice_idx, %c0]
+///     }
+///
+/// Note that these spills/fills are not inserted earlier as concept of a
+/// register, and the need to swap the contents, can't really be represented
+/// correctly at a high level in MLIR.
----------------
c-rhodes wrote:

could this be done as a separate transform pass (like TileAllocation.cpp) that's run before `-arm-sme-to-llvm`? It feels a bit odd to do this during conversion, especially since this has to be registered for specific ops (unlike greedy rewriter):
```
  // Register ops that need spills/fills.
  addSpillAndFillsForTileOp<
      arm_sme::LoadTileSliceOp, arm_sme::MoveTileSliceToVectorOp,
      arm_sme::MoveVectorToTileSliceOp, arm_sme::StoreTileSliceOp,
      arm_sme::OuterProductOp, arm_sme::ZeroOp>(patterns, converter);
```

and we must ensure new ops that also need spills/fills are added here, which could easily be missed?

https://github.com/llvm/llvm-project/pull/76086


More information about the Mlir-commits mailing list