[llvm] [NVPTX][NFC] Move more TMA lowering to tablegen (PR #140914)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 10:05:52 PDT 2025
================
@@ -547,49 +547,47 @@ multiclass CP_ASYNC_BULK_S2G_INTR<bit has_ch> {
defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G_INTR<0>;
defm CP_ASYNC_BULK_S2G_CH : CP_ASYNC_BULK_S2G_INTR<1>;
-multiclass CP_ASYNC_BULK_G2S<NVPTXRegClass rc> {
- def NAME: NVPTXInst<(outs),
- (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size),
- !strconcat(CpAsyncBulkStr<0, 0>.G2S, " [$dst], [$src], $size, [$mbar];"), []>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def NAME # _MC: NVPTXInst<(outs),
- (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc),
- !strconcat(CpAsyncBulkStr<1, 0>.G2S, " [$dst], [$src], $size, [$mbar], $mc;"), []>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def NAME # _CH: NVPTXInst<(outs),
- (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
- !strconcat(CpAsyncBulkStr<0, 1>.G2S, " [$dst], [$src], $size, [$mbar], $ch;"), []>,
- Requires<[hasPTX<80>, hasSM<90>]>;
- def NAME # _MC_CH: NVPTXInst<(outs),
- (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc, Int64Regs:$ch),
- !strconcat(CpAsyncBulkStr<1, 1>.G2S, " [$dst], [$src], $size, [$mbar], $mc, $ch;"), []>,
- Requires<[hasPTX<80>, hasSM<90>]>;
+multiclass CP_ASYNC_BULK_G2S_INTR<bit has_ch> {
+ defvar Intr = int_nvvm_cp_async_bulk_global_to_shared_cluster;
+
+ def NAME : NVPTXInst<(outs),
+ (ins ADDR:$dst, ADDR:$mbar, ADDR:$src,
+ Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch),
+ !if(has_ch,
+ CpAsyncBulkStr<0, 1>.G2S # " [$dst], [$src], $size, [$mbar], $ch;",
+ CpAsyncBulkStr<0, 0>.G2S # " [$dst], [$src], $size, [$mbar];"),
+ [(Intr addr:$dst, addr:$mbar, addr:$src, i32:$size, i16:$mask, i64:$ch, 0, !if(has_ch, -1, 0))]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
+ def NAME # _MC : NVPTXInst<(outs),
+ (ins ADDR:$dst, ADDR:$mbar, ADDR:$src,
+ Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch),
+ !if(has_ch,
+ CpAsyncBulkStr<1, 1>.G2S # " [$dst], [$src], $size, [$mbar], $mask, $ch;",
+ CpAsyncBulkStr<1, 0>.G2S # " [$dst], [$src], $size, [$mbar], $mask;"),
+ [(Intr addr:$dst, addr:$mbar, addr:$src, i32:$size, i16:$mask, i64:$ch, -1, !if(has_ch, -1, 0))]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
}
-defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S<Int64Regs>;
-defm CP_ASYNC_BULK_G2S_SHARED32 : CP_ASYNC_BULK_G2S<Int32Regs>;
+defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S_INTR<0>;
+defm CP_ASYNC_BULK_G2S_CH : CP_ASYNC_BULK_G2S_INTR<1>;
-multiclass CP_ASYNC_BULK_CTA_TO_CLUSTER<NVPTXRegClass rc> {
- def NAME: NVPTXInst<(outs),
- (ins rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size),
- !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"),
- [(int_nvvm_cp_async_bulk_shared_cta_to_cluster rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size)]>,
- Requires<[hasPTX<80>, hasSM<90>]>;
+def CP_ASYNC_BULK_CTA_TO_CLUSTER : NVPTXInst<(outs),
+ (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, Int32Regs:$size),
+ !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"),
----------------
AlexMaclean wrote:
Nit: use `#`
https://github.com/llvm/llvm-project/pull/140914
More information about the llvm-commits
mailing list