[Mlir-commits] [mlir] [mlir][ArmSME] Support vertical layout in load and store ops (PR #66758)
Andrzej WarzyĆski
llvmlistbot at llvm.org
Mon Sep 25 00:55:10 PDT 2023
================
@@ -204,30 +204,59 @@ struct LoadTileSliceToArmSMELowering
auto allActiveMask = rewriter.create<vector::SplatOp>(loc, predTy, one);
auto tileI32 = castTileIDToI32(tile, loc, rewriter);
- // Create 'arm_sme.intr.ld1*.horiz' intrinsic to load ZA tile slice.
- switch (tileElementWidth) {
- default:
- llvm_unreachable("unexpected element type!");
- case 8:
- rewriter.create<arm_sme::aarch64_sme_ld1b_horiz>(loc, allActiveMask, ptr,
- tileI32, tileSliceI32);
- break;
- case 16:
- rewriter.create<arm_sme::aarch64_sme_ld1h_horiz>(loc, allActiveMask, ptr,
- tileI32, tileSliceI32);
- break;
- case 32:
- rewriter.create<arm_sme::aarch64_sme_ld1w_horiz>(loc, allActiveMask, ptr,
- tileI32, tileSliceI32);
- break;
- case 64:
- rewriter.create<arm_sme::aarch64_sme_ld1d_horiz>(loc, allActiveMask, ptr,
- tileI32, tileSliceI32);
- break;
- case 128:
- rewriter.create<arm_sme::aarch64_sme_ld1q_horiz>(loc, allActiveMask, ptr,
- tileI32, tileSliceI32);
- break;
+ arm_sme::TileSliceLayout layout = loadTileSliceOp.getLayout();
+
+ // Create 'arm_sme.intr.ld1*.(horiz|vert)' intrinsic to load ZA tile slice.
+ if (layout == arm_sme::TileSliceLayout::Horizontal) {
+ switch (tileElementWidth) {
+ default:
+ llvm_unreachable("unexpected element type!");
+ case 8:
+ rewriter.create<arm_sme::aarch64_sme_ld1b_horiz>(
+ loc, allActiveMask, ptr, tileI32, tileSliceI32);
+ break;
+ case 16:
+ rewriter.create<arm_sme::aarch64_sme_ld1h_horiz>(
+ loc, allActiveMask, ptr, tileI32, tileSliceI32);
+ break;
+ case 32:
+ rewriter.create<arm_sme::aarch64_sme_ld1w_horiz>(
+ loc, allActiveMask, ptr, tileI32, tileSliceI32);
+ break;
+ case 64:
+ rewriter.create<arm_sme::aarch64_sme_ld1d_horiz>(
+ loc, allActiveMask, ptr, tileI32, tileSliceI32);
+ break;
+ case 128:
+ rewriter.create<arm_sme::aarch64_sme_ld1q_horiz>(
+ loc, allActiveMask, ptr, tileI32, tileSliceI32);
+ break;
----------------
banach-space wrote:
This is the only thing that came to my mind:
```cpp
template <int N>
void callLoadIntrinsic(ConversionPatternRewriter &rewriter,
arm_sme::StoreTileSliceOp storeTileSliceOp,
mlir::vector::SplatOp allActiveMask, Value ptr, Value tileI32,
mlir::arith::IndexCastUIOp tileSliceI32) {
}
template <>
void callLoadIntrinsic<8>(ConversionPatternRewriter &rewriter,
arm_sme::StoreTileSliceOp storeTileSliceOp,
mlir::vector::SplatOp allActiveMask, Value ptr,
Value tileI32,
mlir::arith::IndexCastUIOp tileSliceI32) {
rewriter.replaceOpWithNewOp<arm_sme::aarch64_sme_st1b_horiz>(
storeTileSliceOp, allActiveMask, ptr, tileI32, tileSliceI32);
}
```
But that wouldn't be an improvement IMHO, so I'd go ahead with what you have here already.
https://github.com/llvm/llvm-project/pull/66758
More information about the Mlir-commits
mailing list